Source code for cardinal_pythonlib.sqlalchemy.list_types

#!/usr/bin/env python
# cardinal_pythonlib/sqlalchemy/list_types.py

"""
===============================================================================

    Original code copyright (C) 2009-2022 Rudolf Cardinal (rudolf@pobox.com).

    This file is part of cardinal_pythonlib.

    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
    You may obtain a copy of the License at

        https://www.apache.org/licenses/LICENSE-2.0

    Unless required by applicable law or agreed to in writing, software
    distributed under the License is distributed on an "AS IS" BASIS,
    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    See the License for the specific language governing permissions and
    limitations under the License.

===============================================================================

**SQLAlchemy type classes to store different kinds of lists in a database.**

"""

import csv
from io import StringIO
from typing import List, Optional

from cardinal_pythonlib.logs import get_brace_style_log_with_null_handler
from sqlalchemy.engine.interfaces import Dialect
from sqlalchemy.sql.sqltypes import Text, UnicodeText
from sqlalchemy.sql.type_api import TypeDecorator

log = get_brace_style_log_with_null_handler(__name__)


# =============================================================================
# StringListType
# =============================================================================


[docs]class StringListType(TypeDecorator): r""" Store a list of strings as CSV. (Rather less arbitrary in its encoding requirements than e.g. https://sqlalchemy-utils.readthedocs.io/en/latest/_modules/sqlalchemy_utils/types/scalar_list.html#ScalarListType.) - 2019-01-01: removed trailing ``\r\n`` (via ``lineterminator=""``). Some related test code: .. code-block:: python import csv from io import StringIO pythonlist = [None, 1, "string", "commas, within string", "line 1\nline2"] output_1 = StringIO() wr_1 = csv.writer(output_1, quoting=csv.QUOTE_ALL) # appends '\r\n' wr_1.writerow(pythonlist) csvstring_1 = output_1.getvalue() print(repr(csvstring_1)) backtopython_1 = list(csv.reader([csvstring_1]))[0] print(repr(backtopython_1)) output_2 = StringIO() wr_2 = csv.writer(output_2, quoting=csv.QUOTE_ALL, lineterminator="") wr_2.writerow(pythonlist) csvstring_2 = output_2.getvalue() print(repr(csvstring_2)) backtopython_2 = list(csv.reader([csvstring_2]))[0] print(repr(backtopython_2)) assert len(csvstring_1) > len(csvstring_2) assert backtopython_1 == backtopython_2 So: - The newline terminator is obviously unnecessary for something that will always be a single CSV line. - Eliminating it saves two bytes and adds clarity in the database representation. - Eliminating it keeps the system back-compatible, since the reader happily reads things without the line terminator. - **NOTE** in particular that this does not behave completely like a plain Python list on the Python side, as follows. - When an ORM object is created, the default value on the Python side is ``None``. - The SQLAlchemy ``default`` option is invoked at ``INSERT``, not at ORM object creation; see https://docs.sqlalchemy.org/en/latest/core/metadata.html#sqlalchemy.schema.Column.params.default. - The SQLAlchemy ``server_default`` is the DDL ``DEFAULT`` value, not a Python default. - On database load, everything is fine (as ``process_result_value`` will be called, which can translate a database ``NULL`` to a Python ``[]``). - So that means that **if you want the field to be a list rather than None from the outset,** you must set it to ``[]`` from ``__init__()``. - Secondly, SQLAlchemy makes its columns behave in a special way **upon assignment**. So, in particular, ``mylist.append(value)`` will not itself mark the field as "dirty" and in need of writing to the database. - Internally, support we define (on the class) ``mycol = Column(Integer)``, and then create an instance via ``instance = cls()``. - Then ``cls.mycol`` will actually be of type :class:`sqlalchemy.orm.attributes.InstrumentedAttribute`, and ``instance.mycol`` will be of type ``int`` (or ``NoneType`` if it's ``None``). .. code-block:: python from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.sql.schema import Column from sqlalchemy.sql.sqltypes import Integer Base = declarative_base() class MyClass(Base): __tablename__ = "mytable" pk = Column(Integer, primary_key=True) mycol = Column(Integer) instance = MyClass() type(MyClass.pk) # <class 'sqlalchemy.orm.attributes.InstrumentedAttribute'> type(instance.pk) # <class 'NoneType'> - The class :class:`sqlalchemy.orm.attributes.InstrumentedAttribute` implements :meth:`__set__`, :meth:`__delete__`, and :meth:`__get__`. This means that when you write ``instance.mycol = 5``, it calls the ``__set__()`` function; see https://docs.python.org/3.7/howto/descriptor.html. - So, for a list (e.g. ``mylist = Column(StringListType)``, if you write ``mylist = [value1, value2]``, it will call the appropriate ``__set__()`` function and mark the field as "dirty" (see e.g. :meth:`sqlalchemy.orm.attributes.ScalarAttributeImpl.set`). **But** if ``mylist`` is already a list and you write ``mylist.append(value)``, the ``__set__()`` function won't be called. - If you haven't yet written the instance to the database, this doesn't matter; "new" values are considered dirty and are written to the database fine. But if you (a) create, (b) save, and then (c) append to a list, the change won't be noticed. Since SQLAlchemy can save objects for you as soon as another object needs to know it's PK, the fact that (b) has happened may not be obvious. - Therefore, in short, **beware append() and use assignment** for these sorts of lists, if this might apply; e.g. ``mylist = mylist + [value]``. - Don't use ``+=``, either; that calls ``list.__iadd__()`` and modifies the existing list, rather than calling ``InstrumentedAttribute.__set__()``. - So one method is to ignore ``__init__()`` (meaning new instances will have the list-type field set to ``None``) and then using this sort of access function: .. code-block:: python def add_to_mylist(self, text: str) -> None: if self.mylist is None: self.mylist = [text] else: # noinspection PyAugmentAssignment self.mylist = self.mylist + [text] # not "append()", not "+=" """ # noqa impl = UnicodeText() @property def python_type(self): return list @staticmethod def _strlist_to_dbstr(strlist: Optional[List[str]]) -> str: if not strlist: return "" output = StringIO() wr = csv.writer(output, quoting=csv.QUOTE_ALL, lineterminator="") wr.writerow(strlist) return output.getvalue() @staticmethod def _dbstr_to_strlist(dbstr: Optional[str]) -> List[str]: if not dbstr: return [] try: return list(csv.reader([dbstr]))[0] # ... list( generator( list_of_lines ) )[first_line] except csv.Error: log.warning( "StringListType: Unable to convert database value of " "{!r} to Python; returning empty list", dbstr, ) return []
[docs] def process_bind_param( self, value: Optional[List[str]], dialect: Dialect ) -> str: """Convert things on the way from Python to the database.""" retval = self._strlist_to_dbstr(value) return retval
[docs] def process_literal_param( self, value: Optional[List[str]], dialect: Dialect ) -> str: """Convert things on the way from Python to the database.""" retval = self._strlist_to_dbstr(value) return retval
# Could also use "process_literal_param = process_bind_param" # or vice versa, but this adds some clarity via docstrings.
[docs] def process_result_value( self, value: Optional[str], dialect: Dialect ) -> List[str]: """Convert things on the way from the database to Python.""" retval = self._dbstr_to_strlist(value) return retval
# ============================================================================= # IntListType # =============================================================================
[docs]class IntListType(TypeDecorator): """ Store a list of integers as CSV. **Note:** see :class:`StringListType` for a general discussion about SQLAlchemy types where the Python representation is a list; they can seem slightly unusual. """ impl = Text() @property def python_type(self): return list @staticmethod def _intlist_to_dbstr(intlist: Optional[List[int]]) -> str: if not intlist: return "" return ",".join(str(x) for x in intlist) @staticmethod def _dbstr_to_intlist(dbstr: Optional[str]) -> List[int]: if not dbstr: return [] try: return [int(x) for x in dbstr.split(",")] except (TypeError, ValueError): log.warning( "IntListType: Unable to convert database value of {!r}" " to Python; returning empty list", dbstr, ) return []
[docs] def process_bind_param( self, value: Optional[List[int]], dialect: Dialect ) -> str: """Convert things on the way from Python to the database.""" retval = self._intlist_to_dbstr(value) return retval
[docs] def process_literal_param( self, value: Optional[List[int]], dialect: Dialect ) -> str: """Convert things on the way from Python to the database.""" retval = self._intlist_to_dbstr(value) return retval
[docs] def process_result_value( self, value: Optional[str], dialect: Dialect ) -> List[int]: """Convert things on the way from the database to Python.""" retval = self._dbstr_to_intlist(value) return retval