Source code for forte.models.da_rl.magic_model

# Copyright 2020 The Forte Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# pylint: disable=non-parent-init-called
# pylint: disable=super-init-not-called
"""
A magic model that registers the parameter of a pytorch nn module
and performs memory-efficient parameter updates locally.
"""

import copy
from typing import Dict
from forte.utils import create_import_error_msg

try:
    import torch
    from torch import nn
except ImportError as e:
    raise ImportError(
        create_import_error_msg("torch", "models", "magic models")
    ) from e

try:
    import texar.torch as tx
except ImportError as e:
    raise ImportError(
        create_import_error_msg("texar-pytorch", "models", "magic models")
    ) from e

__all__ = ["MetaModule", "TexarBertMetaModule"]


[docs]class MetaModule(nn.ModuleList): # pylint: disable=line-too-long r"""A class extending :class:`torch.nn.ModuleList` that registers the parameters of a :class:`torch.nn.Module` and performs memory-efficient parameter updates locally. This code is adapted from: https://github.com/tanyuqian/learning-data-manipulation/blob/master/magic_module.py It implements the calculation: :math:`L(\theta - \nabla_{\theta} L_{train}(\theta, \phi))`. Args: module: A :class:`torch.nn.Module`. This class can be used for simple input module, whose sub-modules don't contain other helper functions or attributes that do not belong to this class to perform their :meth:`forward`. Otherwise, since :meth:`forward` calls the input module's :meth:`forward`, in order to perform :meth:`forward` of the sub-modules of the input module correctly, this class needs to extend those sub-modules that define the methods needed for their :meth:`forward`, so that it inherits their methods to perform the sub-module's :meth:`forward`. For example, if the input module is :class:`~texar.torch.modules.BERTClassifier`, :meth:`_get_noise_shape`, :meth:`_split_heads`, :meth:`_combine_heads` from its sub-modules (E.g. :class:`~texar.torch.modules.BERTEncoder`) are needed to be exposed in this class to perform their :meth:`forward`. Please refer to :class:`TexarBertMetaModule` for instructions on creating a subclass from this one for a specific input module. """ def __init__(self, module: nn.Module): nn.Module.__init__(self) self._type = type(module) for key, value in module._parameters.items(): if value is not None: self.register_parameter("_origin_" + key, value) self.register_buffer(key, value.data) else: self.register_buffer(key, None) for key, value in module._buffers.items(): self.register_buffer(key, copy.deepcopy(value)) # Recursively create MetaModule. for key, value in module._modules.items(): # type(self) is the real class object # it can be MetaModule(value), or it can be its subclass, # e.g. TexarBertMetaModule(value) self.add_module(key, type(self)(value)) for key, value in module.__dict__.items(): if ( key not in self.__dict__ and key not in self._buffers and key not in self._modules ): self.__setattr__(key, value)
[docs] def forward(self, *args, **kwargs): return self._type.forward(self, *args, **kwargs)
def update_params(self, deltas: Dict[str, torch.Tensor]): sub_params: Dict[str, torch.Tensor] = {} for key, delta in deltas.items(): if "." not in key: self._buffers[key] = self._buffers[key] + delta else: attr = key.split(".")[0] if attr not in sub_params: sub_params[attr] = {} sub_params[attr][".".join(key.split(".")[1:])] = delta for key, value in sub_params.items(): self._modules[key].update_params(value)
[docs]class TexarBertMetaModule( MetaModule, tx.modules.EmbedderBase, tx.modules.MultiheadAttentionEncoder ): r"""A subclass that extends :class:`MetaModule` to do parameter updates locally for texar-pytorch Bert related modules. E.g. :class:`texar.torch.modules.BERTClassifier` Please refer to its base class :class:`MetaModule` for more details. Args: module: A :class:`torch.nn.Module`. This class extends :class:`~texar.torch.modules.EmbedderBase` and :class:`~texar.torch.modules.MultiheadAttentionEncoder`, such that it inherits their methods that are needed to perform :meth:`forward` of the modules that utilizes these methods, E.g. :class:`~texar.torch.modules.BERTEncoder`, Some notes of the order of the base classes that this class extends: `MetaModule` should be the first one, so that its :meth:`forward` will call :meth:`MetaModule.forward` instead of the :meth:`forward` of the other base classes, such as :func:`texar.torch.modules.MultiheadAttentionEncoder.forward`. If `MetaModule` is not the first one, then a :meth:`forward` should be defined in this class, such that it is called correctly. Example: .. code-block:: python def forward(self, *args, **kwargs): return MetaModule.forward(self, *args, **kwargs) """ def __init__(self, module: nn.Module): MetaModule.__init__(self, module)