#!/usr/bin/env python
# cardinal_pythonlib/chebi.py
"""
===============================================================================
Original code copyright (C) 2009-2022 Rudolf Cardinal (rudolf@pobox.com).
This file is part of cardinal_pythonlib.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
===============================================================================
**Functions to assist with the ChEBI database.**
ChEBI: Chemical Entities of Biological Interest (ChEBI) database from
EMBL-EBI (European Molecular Biology Laboratory / European Bioinformatics
Institute).
See https://www.ebi.ac.uk/chebi/
Examples:
.. code-block:: python
cardinalpythonlib_chebi test
cardinalpythonlib_chebi search citalopram
cardinalpythonlib_chebi search citalopram --exact_search
cardinalpythonlib_chebi search zopiclone
cardinalpythonlib_chebi search zopiclone --exact_search
cardinalpythonlib_chebi search zopiclone --exact_match
cardinalpythonlib_chebi search salicylic --inexact_search
cardinalpythonlib_chebi describe citalopram simvastatin --exact_match
cardinalpythonlib_chebi ancestors citalopram simvastatin
Then try this syntax:
.. code-block:: bash
cardinalpythonlib_chebi categorize \
--entities entities.txt \
--entity_synonyms entity_synonyms.txt \
--categories categories.txt \
--category_synonyms category_synonyms.txt \
--manual_categories manual_categories.txt \
--results results.csv
using files like these:
.. code-block:: none
# entities.txt
# Things to classify.
agomelatine
aspirin
citalopram
simvastatin
.. code-block:: none
# entity_synonyms.txt
# Renaming of entities prior to lookup.
# Find these via "cardinalpythonlib_chebi search ..." or Google with "CHEBI".
aspirin, acetylsalicylic acid
.. code-block:: none
# categories.txt
# Categories to detect, in order of priority (high to low).
serotonin reuptake inhibitor
antidepressant
antilipemic drug
non-steroidal anti-inflammatory drug
.. code-block:: none
# category_synonyms.txt
# Categories that are equivalent but ChEBI doesn't know.
glucagon-like peptide-1 receptor agonist, hypoglycemic agent
.. code-block:: none
# manual_categories.txt
# Categorizations that ChEBI doesn't know.
agomelatine, antidepressant
""" # noqa: E501
import argparse
import csv
import logging
from typing import List, Generator, Optional, Sequence, Set, Tuple, Union
from appdirs import user_cache_dir
try:
# noinspection PyPackageRequirements
from libchebipy import (
ChebiEntity,
Relation,
search,
set_download_cache_path,
)
except ImportError:
raise ImportError(
"Cannot import libchebipy; try the command: pip install libChEBIpy"
)
from cardinal_pythonlib.file_io import (
gen_lines_without_comments,
get_lines_without_comments,
)
from cardinal_pythonlib.logs import main_only_quicksetup_rootlogger
from cardinal_pythonlib.version_string import VERSION_STRING
log = logging.getLogger(__name__)
# =============================================================================
# Constants
# =============================================================================
DEFAULT_CACHE_PATH = user_cache_dir("chebi")
DEFAULT_ANCESTOR_RELATIONSHIPS = ["has_role", "is_a"] # both are helpful
DEFAULT_EXACT_SEARCH = False
DEFAULT_EXACT_MATCH = False
# =============================================================================
# Hashable version of ChebiEntity
# =============================================================================
_CHEBI_ID_PREFIX = "CHEBI:"
[docs]def get_chebi_id_number_str(entity: ChebiEntity) -> str:
"""
Returns the CHEBI ID number as a string.
Args:
entity:
a :class:`libchebipy.ChebiEntity`
"""
return entity.get_id().replace(_CHEBI_ID_PREFIX, "")
[docs]def get_chebi_id_number(entity: ChebiEntity) -> int:
"""
Returns the CHEBI ID number as an integer.
Args:
entity:
a :class:`libchebipy.ChebiEntity`
"""
return int(get_chebi_id_number_str(entity))
[docs]class HashableChebiEntity(ChebiEntity):
"""
Hashable version of :class:`libchebipy.ChebiEntity`.
"""
@classmethod
def from_chebi_entity(cls, entity: ChebiEntity) -> "HashableChebiEntity":
id_number_str = get_chebi_id_number_str(entity)
return HashableChebiEntity(id_number_str)
def get_id_number_str(self) -> str:
return get_chebi_id_number_str(self)
def get_id_number(self) -> int:
return get_chebi_id_number(self)
def __eq__(self, other: Union[str, int, "HashableChebiEntity"]) -> bool:
if isinstance(other, str):
return other == self.get_name()
elif isinstance(other, int):
return other == self.get_id_number()
else:
# noinspection PyUnresolvedReferences
return self.get_id_number() == other.get_id_number()
def __hash__(self) -> int:
return self.get_id_number()
# =============================================================================
# Descriptions of a ChebiEntity
# =============================================================================
[docs]def brief_description(entity: ChebiEntity) -> str:
"""
Args:
entity:
a :class:`ChebiEntity`
Returns:
str: name and ID
"""
return f"{entity.get_name()} ({entity.get_id()})"
# =============================================================================
# Searching ChEBI
# =============================================================================
[docs]def get_entity(chebi_id: Union[int, str]) -> ChebiEntity:
"""
Fetch a ChEBI entity by its ID.
Args:
chebi_id:
integer ChEBI ID like ``15903``, or string ID like ``'15903'``,
or string ID like ``'CHEBI:15903'``.
"""
chebi_id = str(chebi_id) # ignore buggy demo code; int not OK
log.debug(f"Looking up ChEBI ID: {chebi_id}")
return ChebiEntity(chebi_id)
[docs]def search_entities(
search_term: Union[int, str],
exact_search: bool = DEFAULT_EXACT_SEARCH,
exact_match: bool = DEFAULT_EXACT_MATCH,
) -> List[ChebiEntity]:
"""
Search for ChEBI entities.
Case-insensitive.
Args:
search_term:
String or integer to search for.
exact_search:
The ``exact`` parameter to :func:`libchebipy.search`.
exact_match:
Ensure that the name of the result exactly matches the search term.
Example: an exact search for "zopiclone" gives both "zopiclone
(CHEBI:32315)" and "(5R)-zopiclone (CHEBI:53762)"; this option
filters to the first.
"""
log.debug(
f"Searching for {search_term!r} "
f"(exact_search={exact_search}, exact_match={exact_match})"
)
results = search(search_term, exact=exact_search)
log.debug(
f"libchebipy.search({search_term!r}, exact={exact_search}) "
f"-> {results!r}"
)
if exact_match:
if isinstance(search_term, int):
results = [
r for r in results if get_chebi_id_number(r) == search_term
]
else:
assert isinstance(search_term, str)
results = [
r
for r in results
if r.get_name().lower() == search_term.lower()
]
log.debug(
f"search_entities({search_term!r}, exact_search={exact_search}, "
f"exact_match={exact_match}) -> {results!r}"
)
return results
# =============================================================================
# Describing ChEBI entries
# =============================================================================
[docs]def describe_entity(entity: ChebiEntity) -> None:
"""
Test function to describe a ChEBI entity.
Args:
entity:
a :class:`ChebiEntity`
"""
name = entity.get_name()
out_lines = [] # type: List[str]
for other in entity.get_outgoings():
target = ChebiEntity(other.get_target_chebi_id())
out_lines.append(
f" • {name} {other.get_type()} {brief_description(target)}"
)
in_lines = [] # type: List[str]
for other in entity.get_incomings():
target = ChebiEntity(other.get_target_chebi_id())
in_lines.append(
f" • {brief_description(target)} {other.get_type()} {name}"
)
lines = (
[entity.get_name(), f" ► OUTGOING ({len(out_lines)})"]
+ out_lines
+ [f" ► INCOMING ({len(in_lines)})"]
+ in_lines
)
report = "\n".join(lines)
log.info(f"{entity.get_id()}:\n{report}")
[docs]def search_and_describe(
search_term: Union[int, str],
exact_search: bool = DEFAULT_EXACT_SEARCH,
exact_match: bool = DEFAULT_EXACT_MATCH,
) -> None:
"""
Search for a ChEBI term and describe it to the log.
Args:
search_term: search term
exact_search: exact search?
exact_match: exact match?
"""
entities = search_entities(
search_term, exact_search=exact_search, exact_match=exact_match
)
for entity in entities:
describe_entity(entity)
[docs]def search_and_describe_multiple(
search_terms: List[Union[int, str]],
exact_search: bool = DEFAULT_EXACT_SEARCH,
exact_match: bool = DEFAULT_EXACT_MATCH,
) -> None:
"""
Search for ChEBI terms; describe matching entries to the log.
Args:
search_terms: search term(s)
exact_search: exact search?
exact_match: exact match?
"""
for search_term in search_terms:
search_and_describe(
search_term, exact_search=exact_search, exact_match=exact_match
)
[docs]def search_and_list(
search_term: Union[int, str],
exact_search: bool = DEFAULT_EXACT_SEARCH,
exact_match: bool = DEFAULT_EXACT_MATCH,
) -> None:
"""
Search for a ChEBI term; print matching entries to the log.
Args:
search_term: search term
exact_search: exact search?
exact_match: exact match?
"""
entities = search_entities(
search_term, exact_search=exact_search, exact_match=exact_match
)
lines = [f"– {brief_description(entity)}" for entity in entities]
report = "\n".join(lines)
log.info(f"Results:\n{report}")
[docs]def search_and_list_multiple(
search_terms: List[Union[int, str]],
exact_search: bool = DEFAULT_EXACT_SEARCH,
exact_match: bool = DEFAULT_EXACT_MATCH,
) -> None:
"""
Search for ChEBI terms; print matching entries to the log.
Args:
search_terms: search term(s)
exact_search: exact search?
exact_match: exact match?
"""
for search_term in search_terms:
search_and_list(
search_term, exact_search=exact_search, exact_match=exact_match
)
# =============================================================================
# Ancestors and descendants of ChEBI entities
# =============================================================================
[docs]def gen_ancestor_info(
entity: ChebiEntity,
relationships: List[str] = None,
max_generations: int = None,
starting_generation_: int = 0,
seen_: Set[HashableChebiEntity] = None,
) -> Generator[Tuple[HashableChebiEntity, str, int], None, None]:
"""
Retrieves all ancestors ("outgoing" links).
Args:
entity:
starting entity
relationships:
list of valid relationship types, e.g. "has_role"
max_generations:
maximum number of generations to pursue, or ``None`` for unlimited
starting_generation_:
for internal use only, for recursion
seen_:
for internal use only, for recursion
Returns:
list: of tuples ``entity, relationship, n_generations_above_start``
"""
if max_generations is not None and starting_generation_ >= max_generations:
return
assert starting_generation_ == 0 or seen_ is not None
seen_ = seen_ or set() # type: Set[HashableChebiEntity]
relationships = relationships or DEFAULT_ANCESTOR_RELATIONSHIPS
log.debug(
f"Finding ancestors of {brief_description(entity)} "
f"(generation {starting_generation_}) "
f"via relationships {relationships!r}"
)
for rel in entity.get_outgoings(): # type: Relation
if rel.get_type() in relationships:
target = HashableChebiEntity(rel.get_target_chebi_id())
log.debug(f"... found {brief_description(target)}")
if target in seen_:
# log.debug(f"Skipping {target!r}")
continue
seen_.add(target)
yield target, rel.get_type(), starting_generation_ + 1
yield from gen_ancestor_info(
entity=target,
relationships=relationships,
starting_generation_=starting_generation_ + 1,
seen_=seen_,
)
[docs]def gen_ancestors(
entity: ChebiEntity,
relationships: List[str] = None,
max_generations: int = None,
) -> Generator[HashableChebiEntity, None, None]:
"""
Generates ancestors as per :func:`gen_ancestor_info`, without relationship
or generation info.
"""
for (ancestor, relationship, generation) in gen_ancestor_info(
entity, relationships, max_generations
):
yield ancestor
[docs]def report_ancestors(
entity: ChebiEntity,
relationships: List[str] = None,
max_generations: int = None,
) -> None:
"""
Fetches and reports on ancestors of an entity, e.g. via "is_a"
relationships. See :func:`gen_ancestor_info`.
"""
relationships = relationships or DEFAULT_ANCESTOR_RELATIONSHIPS
ancestors = list(
gen_ancestor_info(
entity=entity,
relationships=relationships,
max_generations=max_generations,
)
)
lines = [f"{entity.get_name()} ({entity.get_id()})"]
for ancestor, relationship, generation in ancestors:
prefix = " " * generation
lines.append(
f"{prefix}► {relationship} "
f"{brief_description(ancestor)} [{generation}]"
)
report = "\n".join(lines)
log.info(f"Ancestors via {relationships!r}:\n{report}")
[docs]def report_ancestors_multiple(
entity_names: List[str],
relationships: List[str] = None,
max_generations: int = None,
) -> None:
"""
Looks up entities, then reports on ancestors.
Fetches and reports on ancestors of an entity, e.g. via "is_a"
relationships. See :func:`gen_ancestor_info`.
"""
log.debug(f"Using ancestor relationships: {relationships!r}")
log.debug(f"Using max_generations: {max_generations!r}")
for entity_name in entity_names:
for entity in search_entities(entity_name):
report_ancestors(entity, relationships, max_generations)
# =============================================================================
# Testing
# =============================================================================
[docs]def testfunc1() -> None:
"""
Test ChEBI interface.
"""
log.info("Testing: describe beta-D-glucose")
beta_d_glucose = get_entity(15903)
describe_entity(beta_d_glucose)
# Cross-check:
# https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:15903
# ... correct.
log.info("Testing: describe things like 'citalopram'")
search_and_describe("citalopram", exact_search=False)
log.info("Testing: show ancestors of citalopram")
citalopram = get_entity(3723)
report_ancestors(citalopram)
# https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:3723
sri = "serotonin uptake inhibitor"
log.info(f"Testing: search/list {sri!r}")
search_and_list(sri)
log.info(f"Testing: search/describe {sri!r}")
search_and_describe(sri)
# =============================================================================
# Mapping terms via dictionaries
# =============================================================================
[docs]class CaseInsensitiveDict(dict):
"""
Case-insensitive dictionary for strings; see
https://stackoverflow.com/questions/2082152/case-insensitive-dictionary
"""
def __setitem__(self, key: str, value: str) -> None:
# https://docs.python.org/3/reference/datamodel.html#object.__setitem__
super().__setitem__(key.lower(), value)
def __contains__(self, key: str) -> bool:
# https://docs.python.org/3/reference/datamodel.html#object.__contains__
return super().__contains__(key.lower())
def __getitem__(self, key: str) -> str:
# https://docs.python.org/3/reference/datamodel.html#object.__getitem__
return super().__getitem__(key.lower())
[docs]def read_dict(filename: str) -> CaseInsensitiveDict:
"""
Reads a filename that may have comments but is otherwise in the format
.. code-block:: none
a1, b1
a2, b2
...
Args:
filename:
filename to read
Returns:
dict: mapping the first column (converted to lower case) to the second
(case left intact).
"""
d = CaseInsensitiveDict()
for line in gen_lines_without_comments(filename):
parts = [p.strip() for p in line.split(",")]
if len(parts) == 2:
a = parts[0]
b = parts[1]
d[a] = b
else:
log.error(f"Bad CSV-pair line: {line!r}")
return d
[docs]def translate(term: str, mapping: CaseInsensitiveDict) -> Tuple[str, bool]:
"""
Translates a term through a dictionary. If the term (once converted to
lower case) is in the dictionary (see :func:`read_dict`), the mapped term
is returned; otherwise the original search term is returned.
Args:
term:
term to look up
mapping:
the mapping dictionary
Returns:
tuple: result (str), renamed? (bool)
"""
result = mapping.get(term, term)
return result, result != term
# =============================================================================
# Categorizing drugs
# =============================================================================
[docs]def get_category(
entity_name: str,
categories: Sequence[str],
entity_synonyms: CaseInsensitiveDict = None,
category_synonyms: CaseInsensitiveDict = None,
manual_categories: CaseInsensitiveDict = None,
relationships: List[str] = None,
) -> Optional[str]:
"""
Args:
entity_name:
name of entity to categorize
categories:
permissible categories (earlier preferable to later)
entity_synonyms:
map to rename entities
category_synonyms:
mapping of categories to other (preferred) categories
manual_categories:
manual overrides mapping entity to category
relationships:
list of valid relationship types defining ancestry, e.g. "has_role"
Returns:
chosen category, or ``None`` if none found
"""
entity_synonyms = entity_synonyms or CaseInsensitiveDict()
category_synonyms = category_synonyms or CaseInsensitiveDict()
manual_categories = manual_categories or CaseInsensitiveDict()
relationships = relationships or DEFAULT_ANCESTOR_RELATIONSHIPS
log.debug(f"get_category: entity_name={entity_name!r}")
# Manual override for original name?
if entity_name in manual_categories:
category, _ = translate(
manual_categories[entity_name], category_synonyms
)
log.debug(f"Manual categorization: {entity_name} → {category}")
return category
# Renamed?
entity_name, renamed = translate(entity_name, entity_synonyms)
# Manual override for renamed entity?
if renamed:
if entity_name in manual_categories:
category, _ = translate(
manual_categories[entity_name], category_synonyms
)
log.debug(f"Manual categorization: {entity_name} → {category}")
return category
# Find entity
entities = search_entities(
entity_name, exact_search=True, exact_match=True
)
if len(entities) == 0:
log.warning(f"No entity found for {entity_name!r}")
return None
if len(entities) > 1:
descriptions = "; ".join(brief_description(e) for e in entities)
log.warning(
f"Multiple entities found for {entity_name!r}; "
f"using the first. They were:\n{descriptions}"
)
entity = entities[0]
# Find category
ancestors = list(gen_ancestors(entity, relationships=relationships))
ancestor_categories = [
translate(a.get_name(), category_synonyms)[0] for a in ancestors
]
# log.debug(f"ancestor_categories: {ancestor_categories!r}")
for category in categories: # implements category order
category, _ = translate(category, category_synonyms)
if category in ancestor_categories:
return category
return None
[docs]def categorize_from_file(
entity_filename: str,
category_filename: str,
results_filename: str,
entity_synonyms_filename: str = None,
category_synonyms_filename: str = None,
manual_categories_filename: str = None,
relationships: List[str] = None,
output_dialect: str = "excel",
headers: bool = True,
) -> None:
"""
Categorizes entities.
Args:
entity_filename:
input filename, one entity per line
category_filename:
filename containing permissible categories, one per line
(earlier preferable to later)
results_filename:
output filename for CSV results
entity_synonyms_filename
Name of CSV file (with optional # comments) containing synonyms
in the format ``entity_from, entity_to``.
category_synonyms_filename:
Name of CSV file (with optional # comments) containing synonyms
in the format ``category_from, categoryto``.
manual_categories_filename:
Name of CSV file (with optional # comments) containing manual
categorizations in the format ``entity, category``.
relationships:
list of valid relationship types defining ancestry, e.g. "has_role"
output_dialect:
CSV output dialect
headers:
add CSV headers?
"""
relationships = relationships or DEFAULT_ANCESTOR_RELATIONSHIPS
log.info(f"Using ancestor relationships {relationships!r}")
log.info(f"Reading categories from {category_filename}")
categories = get_lines_without_comments(category_filename)
if entity_synonyms_filename:
log.info(f"Reading entity synonyms from {entity_synonyms_filename}")
entity_synonyms = read_dict(entity_synonyms_filename)
else:
entity_synonyms = CaseInsensitiveDict()
log.debug(f"Using entity synonyms: {entity_synonyms!r}")
if category_synonyms_filename:
log.info(
f"Reading category synonyms from {category_synonyms_filename}"
)
category_synonyms = read_dict(category_synonyms_filename)
else:
category_synonyms = CaseInsensitiveDict()
log.debug(f"Using category synonyms: {category_synonyms!r}")
if manual_categories_filename:
log.info(
f"Reading manual categories from {manual_categories_filename}"
)
manual_categories = read_dict(manual_categories_filename)
else:
manual_categories = CaseInsensitiveDict()
log.debug(f"Using manual categories: {manual_categories!r}")
log.info(f"Writing to {results_filename!r}")
entities_seen = set() # type: Set[str]
with open(results_filename, "w") as outfile:
writer = csv.writer(outfile, dialect=output_dialect)
if headers:
writer.writerow(["entity", "category"])
log.info(f"Reading entities from {entity_filename}")
for entity_name in gen_lines_without_comments(entity_filename):
entity_name_lower = entity_name.lower()
if entity_name_lower in entities_seen:
log.warning(f"Ignoring duplicate: {entity_name!r}")
continue
entities_seen.add(entity_name_lower)
category = (
get_category(
entity_name=entity_name,
categories=categories,
entity_synonyms=entity_synonyms,
category_synonyms=category_synonyms,
manual_categories=manual_categories,
relationships=relationships,
)
or ""
)
if category:
log.debug(f"{entity_name} → {category}")
else:
log.error(f"No category found for {entity_name!r}")
writer.writerow([entity_name, category])
# =============================================================================
# Main
# =============================================================================
[docs]def main() -> None:
"""
Command-line entry point.
"""
# Parser
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--cachepath",
type=str,
default=DEFAULT_CACHE_PATH,
help="Cache path for ChEBI files",
)
parser.add_argument("--verbose", action="store_true", help="Be verbose")
subparsers = parser.add_subparsers(
title="subcommands", description="Valid subcommands", dest="command"
)
subparsers.required = True
def add_exact(p: argparse.ArgumentParser) -> None:
p.add_argument(
"--exact_search",
dest="exact_search",
action="store_true",
help="Search using exact term",
)
p.add_argument(
"--inexact_search",
dest="exact_search",
action="store_false",
help="Search allowing inexact matches",
)
p.set_defaults(exact_search=DEFAULT_EXACT_SEARCH)
p.add_argument(
"--exact_match",
dest="exact_match",
action="store_true",
help="Return results for exact term only",
)
p.add_argument(
"--inexact_match",
dest="exact_match",
action="store_false",
help="Return results allowing inexact matches",
)
p.set_defaults(exact_match=DEFAULT_EXACT_MATCH)
def add_entities(p: argparse.ArgumentParser) -> None:
p.add_argument(
"entity",
type=str,
nargs="+",
help="Entity or entities to search for",
)
# -------------------------------------------------------------------------
# Test
# -------------------------------------------------------------------------
parser_test = subparsers.add_parser("test", help="Run some simple tests")
parser_test.set_defaults(func=lambda args: testfunc1())
# -------------------------------------------------------------------------
# Search
# -------------------------------------------------------------------------
parser_search = subparsers.add_parser(
"search", help="Search for an entity in the ChEBI database"
)
add_entities(parser_search)
add_exact(parser_search)
parser_search.set_defaults(
func=lambda args: search_and_list_multiple(
search_terms=args.entity,
exact_search=args.exact_search,
exact_match=args.exact_match,
)
)
# -------------------------------------------------------------------------
# Describe
# -------------------------------------------------------------------------
parser_describe = subparsers.add_parser(
"describe", help="Describe an entity/entities in the ChEBI database"
)
add_entities(parser_describe)
add_exact(parser_describe)
parser_describe.set_defaults(
func=lambda args: search_and_describe_multiple(
search_terms=args.entity,
exact_search=args.exact_search,
exact_match=args.exact_match,
)
)
# -------------------------------------------------------------------------
# Ancestors
# -------------------------------------------------------------------------
parser_ancestors = subparsers.add_parser(
"ancestors",
help="Show ancestors of an entity/entities in the ChEBI database",
)
add_entities(parser_ancestors)
parser_ancestors.add_argument(
"--relationships",
type=str,
nargs="+",
default=DEFAULT_ANCESTOR_RELATIONSHIPS,
help="Relationship types that define an ancestor",
)
parser_ancestors.add_argument(
"--max_generations",
type=int,
default=None,
help="Number of generations to search, or None for unlimited",
)
parser_ancestors.set_defaults(
func=lambda args: report_ancestors_multiple(
entity_names=args.entity,
relationships=args.relationships,
max_generations=args.max_generations,
)
)
# -------------------------------------------------------------------------
# Categorize
# -------------------------------------------------------------------------
parser_categorize = subparsers.add_parser(
"categorize", help="Categorize a list of drugs."
)
parser_categorize.add_argument(
"--entities",
type=str,
required=True,
help="Input file, one entity (e.g. drug) name per line.",
)
parser_categorize.add_argument(
"--categories",
type=str,
required=True,
help="Name of file containing categories, one per line "
"(earlier categories preferred to later).",
)
parser_categorize.add_argument(
"--entity_synonyms",
type=str,
default=None,
help="Name of CSV file (with optional # comments) containing synonyms "
"in the format 'entity_from, entity_to'",
)
parser_categorize.add_argument(
"--category_synonyms",
type=str,
default=None,
help="Name of CSV file (with optional # comments) containing synonyms "
"in the format 'category_from, category_to'. The translation is "
"applied to ChEBI categories before matching. For example you "
"can map 'EC 3.1.1.7 (acetylcholinesterase) inhibitor' to "
"'acetylcholinesterase inhibitor' and then use only "
"'acetylcholinesterase inhibitor' in your category file.",
)
parser_categorize.add_argument(
"--manual_categories",
type=str,
default=None,
help="Name of CSV file (with optional # comments) containing manual "
"categorizations in the format 'entity, category'",
)
parser_categorize.add_argument(
"--results", type=str, required=True, help="Output CSV file."
)
parser_categorize.add_argument(
"--relationships",
type=str,
nargs="+",
default=DEFAULT_ANCESTOR_RELATIONSHIPS,
help="Relationship types that define an ancestor",
)
parser_categorize.set_defaults(
func=lambda args: categorize_from_file(
entity_filename=args.entities,
results_filename=args.results,
category_filename=args.categories,
entity_synonyms_filename=args.entity_synonyms,
category_synonyms_filename=args.category_synonyms,
manual_categories_filename=args.manual_categories,
relationships=args.relationships,
)
)
# -------------------------------------------------------------------------
# Parse and run
# -------------------------------------------------------------------------
cmdargs = parser.parse_args()
# Logging
main_only_quicksetup_rootlogger(
level=logging.DEBUG if cmdargs.verbose else logging.INFO
)
log.debug(f"ChEBI lookup from cardinal_pythonlib=={VERSION_STRING}")
# Caching
log.debug(f"Using cache path: {cmdargs.cachepath}")
set_download_cache_path(cmdargs.cachepath)
# Do something useful
cmdargs.func(cmdargs)
if __name__ == "__main__":
main()