Source code for forte.data.index

# Copyright 2019 The Forte Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
from collections import defaultdict
from typing import (
    DefaultDict,
    Dict,
    List,
    Set,
    Type,
    Hashable,
    Generic,
    Iterable,
    Tuple,
    KeysView,
)

from forte.common.exception import PackIndexError
from forte.data.ontology.core import GroupType, LinkType, EntryType

logger = logging.getLogger(__name__)

__all__ = ["BaseIndex"]


[docs]class BaseIndex(Generic[EntryType]): r"""A set of indexes used in :class:`~forte.data.base_pack.BasePack`: #. :attr:`entry_index`, the index from each ``tid`` to the corresponding entry #. :attr:`type_index`, the index from each type to the entries of that type #. :attr:`link_index`, the index from child (:attr:`link_index["child_index"]`)and parent (:attr:`link_index["parent_index"]`) nodes to links #. :attr:`group_index`, the index from group members to groups. """ def __init__(self): # List of basic indexes (always on). # Mapping from entry's tid to the entries. self._entry_index: Dict[int, EntryType] = {} # Mapping from entry's type to entries' id. self._type_index: DefaultDict[Type[EntryType], Set[int]] = defaultdict( set ) # Indices below will be built when looked up: # A cache map to store all entries of a certain type including the # sub-types. This index will be populated on demand when # query_by_type_subtype is called. self._subtype_index: Dict[Type[EntryType], Set[int]] = {} self._group_index: DefaultDict[Hashable, Set[int]] = defaultdict(set) self._link_index: Dict[str, DefaultDict[Hashable, Set[int]]] = {} # Indexing switches. self._group_index_switch = False self._link_index_switch = False
[docs] def update_basic_index(self, entries: List[EntryType]): r"""Build or update the basic indexes, including (1) :attr:`entry_index`, the index from each ``tid`` to the corresponding entry; (2) :attr:`type_index`, the index from each type to the entries of that type; (3) :attr:`component_index`, the index from each component to the entries generated by that component. Args: entries (list): a list of entries to be added into the basic index. """ for entry in entries: self._entry_index[entry.tid] = entry self._type_index[type(entry)].add(entry.tid) # Disable sub type index since new items are added and this will # be rebuilt in next query (`query_by_type_subtype`). self._subtype_index.pop(type(entry), None)
def get_entry(self, tid: int) -> EntryType: return self._entry_index[tid] def indexed_types(self) -> KeysView[Type]: return self._type_index.keys() def query_by_type(self, t: Type[EntryType]) -> Set[int]: return self._type_index[t]
[docs] def query_by_type_subtype(self, t: Type[EntryType]) -> Set[int]: r"""Look up the entry indices that are instances of ``entry_type``, including children classes of ``entry_type``. .. note:: all the known types to this data pack will be scanned to find all sub-types. This method will try to cache the sub-type information after the first call, but the cached information could be invalidated by other operations (such as adding new items to the data pack). Args: t: The type of the entry you are looking for. Returns: A set of entry ids. The entries are instances of ``entry_type`` ( and also includes instances of the subclasses of `entry_type`). """ if t in self._subtype_index: return self._subtype_index[t] else: subclass_index: Set[int] = set() for index_key, index_val in self.iter_type_index(): if issubclass(index_key, t): subclass_index.update(index_val) self._subtype_index[t] = subclass_index return subclass_index
def iter_type_index(self) -> Iterable[Tuple[Type, Set[int]]]: for t, ids in self._type_index.items(): yield t, ids def remove_entry(self, entry: EntryType): self._entry_index.pop(entry.tid) self._type_index[type(entry)].remove(entry.tid) self.turn_group_index_switch(on=False) self.turn_link_index_switch(on=False) @property def link_index_on(self): return self._link_index_switch def turn_link_index_switch(self, on: bool): self._link_index_switch = on @property def group_index_on(self): return self._group_index_switch def turn_group_index_switch(self, on: bool): self._group_index_switch = on
[docs] def build_group_index(self, groups: List[GroupType]): r"""Build :attr:`group_index`, the index from group members to groups. Returns: None """ self.turn_group_index_switch(on=True) self._group_index = defaultdict(set) self.update_group_index(groups)
[docs] def group_index(self, tid: int) -> Set[int]: r"""Look up the group_index with key ``tid``. If the index is not built, this will raise a :class:`~forte.common.exception.PackIndexError`. """ if not self.group_index_on: raise PackIndexError("Group index for pack not build") return self._group_index[tid]
[docs] def update_group_index(self, groups: List[GroupType]): r"""Build or update :attr:`group_index`, the index from group members to groups. Args: groups: a list of groups to be added into the index. """ logger.debug("Updating group index") if not self._group_index: raise PackIndexError("Group index has not been built.") for group in groups: for member in group.members: # type: ignore self._group_index[member].add(group.tid)
def add_link_parent(self, parent: EntryType, link: LinkType): self._link_index["parent_index"][parent.index_key].add(link.tid) def add_link_child(self, child: EntryType, link: LinkType): self._link_index["child_index"][child.index_key].add(link.tid) def add_group_member(self, member: EntryType, group: GroupType): self._group_index[member.index_key].add(group.tid)