From 73e91f736384870eb4f3487a0ce9f7763686d868 Mon Sep 17 00:00:00 2001 From: alhendrickson Date: Wed, 24 Jun 2026 11:57:24 +0000 Subject: [PATCH 1/8] fix(medcat-trainer): Don't load addons for document annotation --- medcat-trainer/webapp/api/api/model_cache.py | 35 ++++++++++++++++++-- medcat-trainer/webapp/api/api/utils.py | 2 +- medcat-trainer/webapp/api/api/views.py | 4 +-- 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/medcat-trainer/webapp/api/api/model_cache.py b/medcat-trainer/webapp/api/api/model_cache.py index ac438ef09..7d64af2a7 100644 --- a/medcat-trainer/webapp/api/api/model_cache.py +++ b/medcat-trainer/webapp/api/api/model_cache.py @@ -1,6 +1,6 @@ import logging import os -from typing import Dict, Optional, Any +from typing import Collection, Dict, Optional, Any from pydantic import ValidationError from opentelemetry import trace @@ -22,6 +22,8 @@ VOCAB_MAP = {} CAT_MAP = {} +_FULL_ADDONS_ATTR = '_trainer_full_addons' + logger = logging.getLogger(__name__) tracer = trace.get_tracer("medcat-trainer") @@ -32,6 +34,26 @@ logger.warning("MAX_MEDCAT_MODELS is not an integer, using default value of 1") +def _remember_full_addons(cat: CAT) -> None: + if not hasattr(cat, _FULL_ADDONS_ATTR): + setattr(cat, _FULL_ADDONS_ATTR, list(cat._pipeline._addons)) + + +def _apply_addon_filter(cat: CAT, + addons: Optional[Collection[str]] = None) -> CAT: + """Return *cat* with pipeline addons filtered; full set is kept on the cache.""" + _remember_full_addons(cat) + full_addons = getattr(cat, _FULL_ADDONS_ATTR) + if addons is None: + cat._pipeline._addons = list(full_addons) + else: + allowed = set(addons) + cat._pipeline._addons = [ + addon for addon in full_addons if addon.addon_type in allowed + ] + return cat + + def _clear_models(cdb_map: Dict[str, CDB]=CDB_MAP, vocab_map: Dict[str, Vocab]=VOCAB_MAP, cat_map: Dict[str, CAT]=CAT_MAP): @@ -186,19 +208,26 @@ def get_medcat_from_model_pack_id(modelpack_id: int, cat_map: Dict[str, CAT]=CAT @tracer.start_as_current_span("get_medcat") def get_medcat(project, + addons: Optional[Collection[str]] = None, cdb_map: Dict[str, CDB]=CDB_MAP, vocab_map: Dict[str, Vocab]=VOCAB_MAP, cat_map: Dict[str, CAT]=CAT_MAP): + """Load (and cache) a MedCAT model for a project. + + The full model is always cached. When *addons* is set, only matching addon + types (e.g. ``'meta_cat'``, ``'rel_cat'``) are active on the returned CAT. + Pass an empty collection for NER+linking only. + """ cat = get_cached_medcat(project, cat_map) if cat is not None: trace.get_current_span().add_event("Loaded medcat from cache") - return cat + return _apply_addon_filter(cat, addons) try: if project.model_pack is None: cat = get_medcat_from_cdb_vocab(project, cdb_map, vocab_map, cat_map) else: cat = get_medcat_from_model_pack(project, cat_map) - return cat + return _apply_addon_filter(cat, addons) except AttributeError as err: raise Exception('Failure loading Project ConceptDB, Vocab or Model Pack. Are these set correctly?') from err diff --git a/medcat-trainer/webapp/api/api/utils.py b/medcat-trainer/webapp/api/api/utils.py index aff47c20f..1580336f8 100644 --- a/medcat-trainer/webapp/api/api/utils.py +++ b/medcat-trainer/webapp/api/api/utils.py @@ -454,7 +454,7 @@ def prep_docs(project_id: List[int], doc_ids: List[int], user_id: int): else: # Use local medcat model logger.info('Loading CAT object in bg process for project: %s', project.id) - cat = get_medcat(project=project) + cat = get_medcat(project=project, addons=[]) # Set CAT filters cat.config.components.linking.filters.cuis = cuis diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py index 825397b72..f90a6787c 100644 --- a/medcat-trainer/webapp/api/api/views.py +++ b/medcat-trainer/webapp/api/api/views.py @@ -1,3 +1,4 @@ +import json import logging import os from smtplib import SMTPException @@ -45,7 +46,6 @@ logger = logging.getLogger(__name__) - # Get the basic version of MedCAT cat = None @@ -329,7 +329,7 @@ def prepare_documents(request): existing_annotations=anns) else: # Use local medcat model - cat = get_medcat(project=project) + cat = get_medcat(project=project, addons=[]) logger.info('loaded medcat model for project: %s', project.id) # Set CAT filters From 25cf120f2587809315cce8b75f0a05e0de82c48b Mon Sep 17 00:00:00 2001 From: alhendrickson Date: Wed, 24 Jun 2026 11:58:19 +0000 Subject: [PATCH 2/8] fix(medcat-trainer): Don't load addons for document annotation - cleanup --- medcat-trainer/webapp/api/api/views.py | 1 - 1 file changed, 1 deletion(-) diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py index f90a6787c..85480c3ab 100644 --- a/medcat-trainer/webapp/api/api/views.py +++ b/medcat-trainer/webapp/api/api/views.py @@ -1,4 +1,3 @@ -import json import logging import os from smtplib import SMTPException From 33f538d1f5a531f59c4d7008e48978bdfe7e6c57 Mon Sep 17 00:00:00 2001 From: alhendrickson Date: Wed, 24 Jun 2026 12:00:42 +0000 Subject: [PATCH 3/8] fix(medcat-trainer): cleanup pydoc --- medcat-trainer/webapp/api/api/model_cache.py | 22 +++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/medcat-trainer/webapp/api/api/model_cache.py b/medcat-trainer/webapp/api/api/model_cache.py index 7d64af2a7..71aa1f9c5 100644 --- a/medcat-trainer/webapp/api/api/model_cache.py +++ b/medcat-trainer/webapp/api/api/model_cache.py @@ -212,11 +212,23 @@ def get_medcat(project, cdb_map: Dict[str, CDB]=CDB_MAP, vocab_map: Dict[str, Vocab]=VOCAB_MAP, cat_map: Dict[str, CAT]=CAT_MAP): - """Load (and cache) a MedCAT model for a project. - - The full model is always cached. When *addons* is set, only matching addon - types (e.g. ``'meta_cat'``, ``'rel_cat'``) are active on the returned CAT. - Pass an empty collection for NER+linking only. + """Load and cache a MedCAT model for a trainer project. + + Args: + project: ``ProjectAnnotateEntities`` to load the model for. + addons: Addon types to enable on the returned model, e.g. + ``['meta_cat']`` or ``['rel_cat']``. Pass an empty collection for + NER and linking only. Defaults to ``None`` (all addons enabled). + cdb_map: Module-level CDB cache. Defaults to ``CDB_MAP``. + vocab_map: Module-level vocab cache. Defaults to ``VOCAB_MAP``. + cat_map: Module-level CAT cache. Defaults to ``CAT_MAP``. + + Returns: + CAT: A cached MedCAT instance for the project. + + Raises: + Exception: If the project ConceptDB, vocab, or model pack is missing + or misconfigured. """ cat = get_cached_medcat(project, cat_map) if cat is not None: From 5520c233949fe9442848d303ad2b4298a2afe428 Mon Sep 17 00:00:00 2001 From: alhendrickson Date: Wed, 24 Jun 2026 12:13:31 +0000 Subject: [PATCH 4/8] fix(medcat-trainer): cleanup types --- medcat-trainer/webapp/api/api/model_cache.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/medcat-trainer/webapp/api/api/model_cache.py b/medcat-trainer/webapp/api/api/model_cache.py index 71aa1f9c5..d555fa16c 100644 --- a/medcat-trainer/webapp/api/api/model_cache.py +++ b/medcat-trainer/webapp/api/api/model_cache.py @@ -1,6 +1,6 @@ import logging import os -from typing import Collection, Dict, Optional, Any +from typing import Dict, Optional, Any from pydantic import ValidationError from opentelemetry import trace @@ -40,16 +40,16 @@ def _remember_full_addons(cat: CAT) -> None: def _apply_addon_filter(cat: CAT, - addons: Optional[Collection[str]] = None) -> CAT: + addons: Optional[list[str]] = None) -> CAT: """Return *cat* with pipeline addons filtered; full set is kept on the cache.""" _remember_full_addons(cat) full_addons = getattr(cat, _FULL_ADDONS_ATTR) if addons is None: cat._pipeline._addons = list(full_addons) else: - allowed = set(addons) + allowed_addons = set(addons) cat._pipeline._addons = [ - addon for addon in full_addons if addon.addon_type in allowed + addon for addon in full_addons if addon.addon_type in allowed_addons ] return cat @@ -208,7 +208,7 @@ def get_medcat_from_model_pack_id(modelpack_id: int, cat_map: Dict[str, CAT]=CAT @tracer.start_as_current_span("get_medcat") def get_medcat(project, - addons: Optional[Collection[str]] = None, + addons: Optional[list[str]] = None, cdb_map: Dict[str, CDB]=CDB_MAP, vocab_map: Dict[str, Vocab]=VOCAB_MAP, cat_map: Dict[str, CAT]=CAT_MAP): @@ -217,8 +217,8 @@ def get_medcat(project, Args: project: ``ProjectAnnotateEntities`` to load the model for. addons: Addon types to enable on the returned model, e.g. - ``['meta_cat']`` or ``['rel_cat']``. Pass an empty collection for - NER and linking only. Defaults to ``None`` (all addons enabled). + ``['meta_cat']`` or ``['rel_cat']``. Pass an empty list for NER and + linking only. Defaults to ``None`` (all addons enabled). cdb_map: Module-level CDB cache. Defaults to ``CDB_MAP``. vocab_map: Module-level vocab cache. Defaults to ``VOCAB_MAP``. cat_map: Module-level CAT cache. Defaults to ``CAT_MAP``. From dcfd27d27e9c1c2d2e6647de56792daa8752e6b6 Mon Sep 17 00:00:00 2001 From: Mart Ratas Date: Wed, 24 Jun 2026 13:43:01 +0100 Subject: [PATCH 5/8] Add resetting of configs --- medcat-trainer/webapp/api/api/model_cache.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/medcat-trainer/webapp/api/api/model_cache.py b/medcat-trainer/webapp/api/api/model_cache.py index d555fa16c..8ad0d3998 100644 --- a/medcat-trainer/webapp/api/api/model_cache.py +++ b/medcat-trainer/webapp/api/api/model_cache.py @@ -51,6 +51,9 @@ def _apply_addon_filter(cat: CAT, cat._pipeline._addons = [ addon for addon in full_addons if addon.addon_type in allowed_addons ] + cat.config.components.addons = [ + addon.config for addon in cat._pipeline._addons + ] return cat From e7107d334cde9de8f1c984d0fe7ac21df360b904 Mon Sep 17 00:00:00 2001 From: Mart Ratas Date: Wed, 24 Jun 2026 13:44:12 +0100 Subject: [PATCH 6/8] Add small comment on addon filtering in core lib --- medcat-trainer/webapp/api/api/model_cache.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/medcat-trainer/webapp/api/api/model_cache.py b/medcat-trainer/webapp/api/api/model_cache.py index 8ad0d3998..9753b04e8 100644 --- a/medcat-trainer/webapp/api/api/model_cache.py +++ b/medcat-trainer/webapp/api/api/model_cache.py @@ -236,12 +236,14 @@ def get_medcat(project, cat = get_cached_medcat(project, cat_map) if cat is not None: trace.get_current_span().add_event("Loaded medcat from cache") + # NOTE: addon filtering needs to be handled on the core lib side in the future return _apply_addon_filter(cat, addons) try: if project.model_pack is None: cat = get_medcat_from_cdb_vocab(project, cdb_map, vocab_map, cat_map) else: cat = get_medcat_from_model_pack(project, cat_map) + # NOTE: addon filtering needs to be handled on the core lib side in the future return _apply_addon_filter(cat, addons) except AttributeError as err: raise Exception('Failure loading Project ConceptDB, Vocab or Model Pack. Are these set correctly?') from err From 787b7cff0e31c839b092de5928eafd6693449273 Mon Sep 17 00:00:00 2001 From: Mart Ratas Date: Wed, 24 Jun 2026 13:46:08 +0100 Subject: [PATCH 7/8] Allow meta cats at model load --- medcat-trainer/webapp/api/api/utils.py | 2 +- medcat-trainer/webapp/api/api/views.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/medcat-trainer/webapp/api/api/utils.py b/medcat-trainer/webapp/api/api/utils.py index 1580336f8..20c236276 100644 --- a/medcat-trainer/webapp/api/api/utils.py +++ b/medcat-trainer/webapp/api/api/utils.py @@ -454,7 +454,7 @@ def prep_docs(project_id: List[int], doc_ids: List[int], user_id: int): else: # Use local medcat model logger.info('Loading CAT object in bg process for project: %s', project.id) - cat = get_medcat(project=project, addons=[]) + cat = get_medcat(project=project, addons=["meta_cat"]) # Set CAT filters cat.config.components.linking.filters.cuis = cuis diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py index 85480c3ab..388f4a954 100644 --- a/medcat-trainer/webapp/api/api/views.py +++ b/medcat-trainer/webapp/api/api/views.py @@ -328,7 +328,7 @@ def prepare_documents(request): existing_annotations=anns) else: # Use local medcat model - cat = get_medcat(project=project, addons=[]) + cat = get_medcat(project=project, addons=["meta_cat"]) logger.info('loaded medcat model for project: %s', project.id) # Set CAT filters From 92d77a989584b091964a7595597bc367f8be9fa6 Mon Sep 17 00:00:00 2001 From: Mart Ratas Date: Wed, 24 Jun 2026 14:49:13 +0100 Subject: [PATCH 8/8] Allow unneeded addons to be garbage collected --- medcat-trainer/webapp/api/api/model_cache.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/medcat-trainer/webapp/api/api/model_cache.py b/medcat-trainer/webapp/api/api/model_cache.py index 9753b04e8..2073e58d9 100644 --- a/medcat-trainer/webapp/api/api/model_cache.py +++ b/medcat-trainer/webapp/api/api/model_cache.py @@ -22,8 +22,6 @@ VOCAB_MAP = {} CAT_MAP = {} -_FULL_ADDONS_ATTR = '_trainer_full_addons' - logger = logging.getLogger(__name__) tracer = trace.get_tracer("medcat-trainer") @@ -34,16 +32,11 @@ logger.warning("MAX_MEDCAT_MODELS is not an integer, using default value of 1") -def _remember_full_addons(cat: CAT) -> None: - if not hasattr(cat, _FULL_ADDONS_ATTR): - setattr(cat, _FULL_ADDONS_ATTR, list(cat._pipeline._addons)) - def _apply_addon_filter(cat: CAT, addons: Optional[list[str]] = None) -> CAT: """Return *cat* with pipeline addons filtered; full set is kept on the cache.""" - _remember_full_addons(cat) - full_addons = getattr(cat, _FULL_ADDONS_ATTR) + full_addons = cat.pipe._addons if addons is None: cat._pipeline._addons = list(full_addons) else: