Source code for forte.models.srl.model

# Copyright 2019 The Forte Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ["LabeledSpanGraphNetwork"]
import math
from collections import defaultdict
from typing import Any, Dict, List, Tuple
from mypy_extensions import TypedDict
from forte.utils import create_import_error_msg
from forte.models.srl import model_utils as utils
from forte.models.srl.data import SRLSpan, Span


try:
    import torch
    from torch import nn
    from torch.nn import functional as F
except ImportError as e:
    raise ImportError(
        create_import_error_msg("torch", "models", "Texar model support")
    ) from e


try:
    import texar.torch as tx
except ImportError as e:
    raise ImportError(
        create_import_error_msg(
            "texar-pytorch", "models", "Texar model support"
        )
    ) from e


[docs]class LabeledSpanGraphNetwork(tx.ModuleBase):
    @property
    def output_size(self):
        """
        This module is supposed to be the last layer so we will not return
        an informative output_size
        Returns:

        """
        return 0

    __torch_device__: torch.device

    def __init__(
        self, word_vocab: tx.data.Vocab, char_vocab: tx.data.Vocab, hparams=None
    ):
        super().__init__(hparams)

        # Word vocabulary & representation
        self.word_vocab = word_vocab
        self.word_embed = tx.modules.WordEmbedder(
            init_value=tx.data.Embedding(
                vocab=self.word_vocab.token_to_id_map_py,
                hparams={
                    "file": self._hparams.context_embeddings.path,
                    "dim": self._hparams.context_embeddings.size,
                    "read_fn": "load_glove",
                },
            ).word_vecs
        )
        self.head_embed = tx.modules.WordEmbedder(
            init_value=tx.data.Embedding(
                vocab=self.word_vocab.token_to_id_map_py,
                hparams={
                    "file": self._hparams.head_embeddings.path,
                    "dim": self._hparams.head_embeddings.size,
                    "read_fn": "load_glove",
                },
            ).word_vecs
        )
        self.span_length_embed = tx.modules.PositionEmbedder(
            position_size=self._hparams.max_arg_width,
            hparams={
                "dim": self._hparams.feature_size,
            },
        )

        # Character vocabulary & representation
        self.char_cnn = utils.CharCNN(
            char_vocab=char_vocab,
            hparams={
                "char_embed_size": self._hparams.char_embedding_size,
                "filter_widths": self._hparams.filter_widths,
                "filter_size": self._hparams.filter_size,
            },
        )
        self.embed_dropout = nn.Dropout(self._hparams.lexical_dropout_rate)

        # ELMo representation
        if self._hparams.elmo.path is not None:
            # pylint: disable=import-outside-toplevel
            from allennlp.modules.elmo import Elmo, batch_to_ids

            elmo_hparams = self._hparams.elmo
            self.elmo = Elmo(
                options_file=elmo_hparams.config,
                weight_file=elmo_hparams.path,
                num_output_representations=1,
            )
            self._elmo_char_ids_fn = batch_to_ids
        else:
            self.elmo = None

        # LSTM
        single_hidden_dim = self._hparams.contextualization_size
        lstm_input_dim = self.word_embed.dim + self.char_cnn.output_size
        if self.elmo is not None:
            lstm_input_dim += self._hparams.elmo.dim
        self.lstm = utils.CustomBiLSTM(
            hparams={
                "input_dim": lstm_input_dim,
                "hidden_dim": single_hidden_dim,
                "num_layers": self._hparams.contextualization_layers,
                "dropout": self._hparams.lstm_dropout_rate,
            }
        )
        hidden_dim = single_hidden_dim * 2

        self.label_vocab = {
            label: idx + 1  # reserve index 0 for null label
            for idx, label in enumerate(self._hparams.srl_labels)
        }
        self.label_inverse_vocab = {v: k for k, v in self.label_vocab.items()}
        self.head_attention = nn.Linear(hidden_dim, 1)

        word_input_dim = self.word_embed.dim + self.char_cnn.output_size
        mlp_num_layers = self._hparams.ffnn_depth
        mlp_hparams = {
            "input_sizes": [
                hidden_dim,  # concat'd state at start of span
                hidden_dim,  # concat'd state at end of span
                word_input_dim,
                self.span_length_embed.dim,
            ],
            "num_layers": mlp_num_layers,
            "hidden_size": [self._hparams.ffnn_size] * mlp_num_layers,
            "dropout_rate": self._hparams.dropout_rate,
        }
        self.argument_mlp = utils.ConcatInputMLP(
            hparams={
                **mlp_hparams,
                "output_size": 1,
                "activation": "ReLU",
            }
        )
        self.predicate_mlp = utils.ConcatInputMLP(
            hparams={
                **mlp_hparams,
                "input_sizes": [hidden_dim],
                "output_size": 1,
                "activation": "ReLU",
            }
        )
        self.span_label_mlp = utils.ConcatInputMLP(
            hparams={
                **mlp_hparams,
                "input_sizes": mlp_hparams["input_sizes"] + [hidden_dim],
                "output_size": len(self.label_vocab),
                "activation": "ReLU",
            }
        )

[docs]    @staticmethod
    def default_hparams() -> Dict[str, Any]:
        return {
            "filter_widths": [3, 4, 5],
            "filter_size": 50,
            "char_embedding_size": 8,
            "context_embeddings": {
                "path": "embeddings/glove.840B.300d.05.filtered",
                "size": 300,
                "datasets": "txt",
                "lowercase": False,
            },
            "head_embeddings": {
                "path": "embeddings/glove_50_300_2.filtered",
                # "path": "embeddings/glove_50_300_2.txt",
                "size": 300,
                "datasets": "txt",
                "lowercase": False,
            },
            "elmo": {
                "path": None,
                "config": None,
                "dim": 256,
            },
            "contextualizer": "lstm",
            "contextualization_size": 200,
            "contextualization_layers": 3,
            "ffnn_size": 150,
            "ffnn_depth": 2,
            "feature_size": 20,
            "max_span_width": 30,
            "model_heads": True,
            "num_attention_heads": 1,
            "srl_labels": [
                # predicate
                "V",
                # simple propositions
                "A0",
                "A1",
                "A2",
                "A3",
                "A4",
                "A5",
                "AA",
                "AM",
                "AM-ADV",
                "AM-CAU",
                "AM-DIR",
                "AM-DIS",
                "AM-EXT",
                "AM-LOC",
                "AM-MNR",
                "AM-MOD",
                "AM-NEG",
                "AM-PNC",
                "AM-PRD",
                "AM-REC",
                "AM-TM",
                "AM-TMP",
                # propositions with coreferenced arguments
                "C-A0",
                "C-A1",
                "C-A2",
                "C-A3",
                "C-A4",
                "C-A5",
                "C-AM-ADV",
                "C-AM-CAU",
                "C-AM-DIR",
                "C-AM-DIS",
                "C-AM-EXT",
                "C-AM-LOC",
                "C-AM-MNR",
                "C-AM-NEG",
                "C-AM-PNC",
                "C-AM-TMP",
                "C-V",
                # propositions with discontinuous argument
                "R-A0",
                "R-A1",
                "R-A2",
                "R-A3",
                "R-A4",
                "R-AA",
                "R-AM-ADV",
                "R-AM-CAU",
                "R-AM-DIR",
                "R-AM-EXT",
                "R-AM-LOC",
                "R-AM-MNR",
                "R-AM-PNC",
                "R-AM-TMP",
            ],
            "max_arg_width": 30,
            "argument_ratio": 0.8,
            "predicate_ratio": 0.4,
            "lexical_dropout_rate": 0.5,
            "dropout_rate": 0.2,
            "lstm_dropout_rate": 0.4,
        }

    @property
    def _device(self) -> torch.device:
        if not hasattr(self, "__torch_device__"):
            self.__torch_device__ = next(self.parameters()).device
        return self.__torch_device__

    def _create_span_indices(
        self, batch_size: int, max_len: int, max_span: int
    ) -> Tuple[torch.LongTensor, torch.LongTensor, torch.LongTensor]:
        start_ids = torch.arange(0, max_len).repeat_interleave(max_span)
        end_ids = start_ids + torch.arange(0, max_span).repeat(max_len)
        valid_mask = end_ids < max_len
        start_ids, end_ids = start_ids[valid_mask], end_ids[valid_mask]
        span_length = end_ids - start_ids
        start_ids = start_ids.expand(batch_size, *start_ids.size()).to(
            device=self._device
        )
        end_ids = end_ids.expand_as(start_ids).to(device=self._device)
        span_length = span_length.expand_as(start_ids).to(device=self._device)
        return start_ids, end_ids, span_length

    @staticmethod
    def _set_submatrix(mat: torch.Tensor, x: int, y: int, value: torch.Tensor):
        mat[x : (x + value.size(0)), y : (y + value.size(1))] = value

    def _create_softmax_mask(
        self, batch_size: int, max_len: int, max_span: int
    ) -> torch.ByteTensor:
        # 1 + 2 + ... + max_span + max_span + ...  (total max_len terms)
        total_lines = (1 + min(max_span, max_len)) * min(max_span, max_len) // 2
        if max_len > max_span:
            total_lines += (max_len - max_span) * max_span

        lower_tri = torch.tril(
            torch.ones(max_span, max_span, dtype=torch.uint8)
        )
        mask = torch.zeros(total_lines, max_len, dtype=torch.uint8)
        line_count = 0
        for idx in range(max_len):
            if max_len - idx < max_span:
                cur_mask = lower_tri[: (max_len - idx), : (max_len - idx)]
            else:
                cur_mask = lower_tri
            self._set_submatrix(mask, line_count, idx, cur_mask)
            line_count += cur_mask.size(0)
        mask = mask.expand(batch_size, total_lines, max_len)
        return mask.to(device=self._device)

    def _filter_labels(
        self,
        start_ids: torch.LongTensor,
        end_ids: torch.LongTensor,
        predicates: torch.LongTensor,
        srls: List[List[SRLSpan]],
    ) -> torch.LongTensor:
        batch_size, num_spans = start_ids.size()
        num_predicates = predicates.size(1)
        device = start_ids.device
        start_ids = start_ids.cpu().numpy()
        end_ids = end_ids.cpu().numpy()
        predicates = predicates.cpu().numpy()
        batch_predicates = [
            {pred: idx for idx, pred in enumerate(preds)}
            for preds in predicates
        ]
        batch_spans = [
            {(l, r): idx for idx, (l, r) in enumerate(zip(starts, ends))}
            for starts, ends in zip(start_ids, end_ids)
        ]

        gold_labels = torch.zeros(
            batch_size, num_predicates * num_spans, dtype=torch.long
        )
        for b_idx in range(batch_size):
            for srl in srls[b_idx]:
                span_idx = batch_spans[b_idx].get((srl.start, srl.end), None)
                predicate_idx = batch_predicates[b_idx].get(srl.predicate, None)
                if span_idx is not None and predicate_idx is not None:
                    label_idx = predicate_idx * num_spans + span_idx
                    gold_labels[b_idx, label_idx] = self.label_vocab[srl.label]
        gold_labels = gold_labels.to(device=device)
        return gold_labels

    def _compute_soft_head_attention_brute(
        self,
        start_ids: torch.LongTensor,
        end_ids: torch.LongTensor,
        sent_lengths: torch.LongTensor,
        states: torch.Tensor,
        word_inputs: torch.Tensor,
    ) -> Tuple[torch.Tensor, torch.LongTensor]:
        device = start_ids.device
        batch_size, max_len = states.size()[:2]
        num_spans = start_ids.size(1)
        max_span_width = self._hparams.max_span_width
        batch_offset = torch.arange(batch_size, device=device) * max_len
        span_indices = torch.arange(max_span_width, device=device)
        # span_indices: (batch_size, num_spans, max_span_width)
        span_indices = (
            span_indices.expand(batch_size, num_spans, -1)
            + start_ids.unsqueeze(-1)
            + batch_offset.view(-1, 1, 1)
        )
        # valid_spans: (batch_size, num_spans)
        valid_spans = end_ids < sent_lengths.unsqueeze(-1)
        # valid_spans_idx: (total_spans)
        valid_spans_idx = valid_spans.view(-1).nonzero().view(-1)
        # flat_span_indices: (total_spans, max_span_width)
        flat_span_indices = torch.index_select(
            span_indices.view(-1, max_span_width), dim=0, index=valid_spans_idx
        )

        # flat_sent_lengths: (total_spans)
        flat_sent_lengths = torch.index_select(
            (
                torch.min(end_ids + 1, sent_lengths.unsqueeze(-1))
                + batch_offset.unsqueeze(-1)
            ).view(-1),
            dim=0,
            index=valid_spans_idx,
        )
        # flat_mask: (total_spans, max_span_width)
        flat_mask = flat_span_indices < flat_sent_lengths.unsqueeze(-1)
        flat_span_indices *= flat_mask.type_as(flat_span_indices)

        # span_word_inputs: (total_spans, max_span_width, word_input_dim)
        span_word_inputs = torch.index_select(
            word_inputs.view(-1, word_inputs.size(-1)),
            dim=0,
            index=flat_span_indices.view(-1),
        ).view(*flat_span_indices.size(), -1)

        # logits: (batch_size, max_len)
        logits = self.head_attention(states).squeeze(-1)
        # flat_span_logits: (total_spans, max_span_width)
        flat_span_logits = torch.index_select(
            logits.view(-1), dim=0, index=flat_span_indices.view(-1)
        ).view(flat_span_indices.size())
        masked_span_logits = flat_span_logits - 1e10 * (~flat_mask).type_as(
            flat_span_logits
        )
        weights = torch.softmax(masked_span_logits, dim=-1)

        # weighted_inputs: (total_spans, max_span_width, word_input_dim)
        weighted_inputs = span_word_inputs * weights.unsqueeze(-1)
        # soft_head: (total_spans, word_input_dim)
        soft_head = torch.sum(weighted_inputs, dim=1)
        # indices: (batch_size, num_spans)
        indices = torch.cumsum(valid_spans.view(-1).type(torch.long), dim=0) - 1
        indices = torch.clamp_min(indices, 0).view_as(valid_spans)

        return soft_head, indices

[docs]    class ReturnType(TypedDict):
        loss: torch.Tensor
        total_scores: torch.Tensor
        start_ids: torch.LongTensor
        end_ids: torch.LongTensor
        predicates: torch.LongTensor

    def _arange(self, *args, **kwargs):
        return torch.arange(*args, device=self._device, **kwargs)

[docs]    def forward(self, inputs: tx.data.Batch) -> ReturnType:
        # Compute embeddings and recurrent states.
        char_embed = self.char_cnn(inputs.text)
        with torch.no_grad():
            # A workaround for freezing embeddings.
            word_embed = self.word_embed(inputs.text_ids)
            head_embed = self.head_embed(inputs.text_ids)
        context_embeds = [word_embed, char_embed]
        head_embeds = [head_embed, char_embed]
        if self.elmo is not None:
            char_ids = self._elmo_char_ids_fn(inputs.text).to(self._device)
            elmo_embed = self.elmo(char_ids)["elmo_representations"][0]
            context_embeds.append(elmo_embed)
        # *word_inputs: (batch_size, max_len, word_input_dim)
        lstm_word_inputs = self.embed_dropout(torch.cat(context_embeds, dim=-1))
        word_inputs = self.embed_dropout(torch.cat(head_embeds, dim=-1))

        # states: (batch_size, max_len, hidden_dim)
        states = self.lstm(lstm_word_inputs, inputs.length)

        # Create span indices.
        batch_size, max_len = inputs.text_ids.size()
        max_span = self._hparams.max_span_width
        # *_ids: (batch_size, max_num_spans)
        # max_num_spans ~= max_len * max_span
        start_ids, end_ids, span_length = self._create_span_indices(
            batch_size, max_len, max_span
        )
        # Create soft head representation weights.
        # head_attn_cache, head_attn_index = self._compute_soft_head_attention(
        (
            head_attn_cache,
            head_attn_index,
        ) = self._compute_soft_head_attention_brute(
            start_ids, end_ids, inputs.length, states, word_inputs
        )

        # Compute argument & predicate scores.
        span_length_embed = self.embed_dropout(self.span_length_embed.embedding)
        cache_inputs = [states, states, head_attn_cache, span_length_embed]
        pred_indices = self._arange(max_len).expand(batch_size, -1)
        with self.argument_mlp.cache_results(
            cache_inputs
        ), self.predicate_mlp.cache_results([states]):
            # arg_scores: (batch_size, max_num_spans)
            arg_scores = self.argument_mlp(
                [start_ids, end_ids, head_attn_index, span_length]
            ).squeeze(-1)
            # pred_scores: (batch_size, max_len)
            pred_scores = self.predicate_mlp([pred_indices]).squeeze(-1)

        # Beam pruning of arguments & predicates.
        # topk_*: (batch_size, max_arguments)
        max_arguments = math.ceil(self._hparams.argument_ratio * max_len)
        num_arguments = torch.ceil(
            self._hparams.argument_ratio * inputs.length.float()
        ).long()
        topk_arg_scores, topk_arg_indices = torch.topk(
            arg_scores, k=max_arguments, dim=1, sorted=True
        )
        topk_start_ids, topk_end_ids, topk_attn_index = utils.batch_gather(
            [start_ids, end_ids, head_attn_index], index=topk_arg_indices
        )
        topk_span_length = topk_end_ids - topk_start_ids

        # topk_pred_*: (batch_size, max_predicates)
        max_predicates = math.ceil(self._hparams.predicate_ratio * max_len)
        num_predicates = torch.ceil(
            self._hparams.predicate_ratio * inputs.length.float()
        ).long()
        topk_pred_scores, topk_pred_indices = torch.topk(
            pred_scores, k=max_predicates, dim=1, sorted=True
        )

        # Compute label scores for pruned argument-predicate pairs.
        with self.span_label_mlp.cache_results(cache_inputs + [states]):
            # label_scores:
            #   (batch_size, max_predicates * max_arguments, num_labels)
            label_scores = self.span_label_mlp(
                [
                    topk_start_ids.repeat(1, max_predicates),
                    topk_end_ids.repeat(1, max_predicates),
                    tx.utils.map_structure(
                        lambda x: x.repeat(1, max_predicates)
                        if isinstance(x, torch.Tensor)
                        else x,
                        topk_attn_index,
                    ),
                    topk_span_length.repeat(1, max_predicates),
                    topk_pred_indices.repeat_interleave(max_arguments, dim=1),
                ]
            )

        # Compute log-probabilities.
        total_scores = (
            topk_arg_scores.repeat(1, max_predicates).unsqueeze(-1)
            + topk_pred_scores.repeat_interleave(
                max_arguments, dim=1
            ).unsqueeze(-1)
            + label_scores
        )
        total_scores = torch.cat(
            [
                total_scores.new_zeros(*total_scores.size()[:-1], 1),
                total_scores,
            ],
            dim=-1,
        )
        gold_labels = self._filter_labels(
            topk_start_ids, topk_end_ids, topk_pred_indices, inputs.srl
        )

        # Compute masked loss.
        # unmasked_loss: (batch_size, max_predicates * max_arguments)
        unmasked_loss = F.cross_entropy(
            total_scores.view(-1, total_scores.size(-1)),
            gold_labels.view(-1),
            reduction="none",
        )
        # pred_*_mask: (batch_size, max_predicates)
        pred_index_mask = topk_pred_indices < inputs.length.unsqueeze(-1)
        pred_topk_mask = self._arange(max_predicates).unsqueeze(
            0
        ) < num_predicates.unsqueeze(1)
        # arg_*_mask: (batch_size, max_arguments)
        arg_index_mask = topk_end_ids < inputs.length.unsqueeze(-1)
        arg_topk_mask = self._arange(max_arguments).unsqueeze(
            0
        ) < num_arguments.unsqueeze(1)
        loss_mask = (
            (arg_index_mask & arg_topk_mask).unsqueeze(1)
            & (pred_index_mask & pred_topk_mask).unsqueeze(2)
        ).view(-1)
        loss = torch.sum(unmasked_loss * loss_mask.type_as(unmasked_loss))
        # loss = loss / batch_size
        return {
            "loss": loss,
            "total_scores": total_scores,
            "start_ids": topk_start_ids,
            "end_ids": topk_end_ids,
            "predicates": topk_pred_indices,
        }

    _CORE_ARGS = {
        f"{prefix}{arg}": 1 << idx
        for prefix in ["A", "ARG"]
        for idx, arg in enumerate("012345A")
    }

    def _dp_decode(
        self,
        max_len: int,
        pred_idx: int,
        start_ids: List[int],
        end_ids: List[int],
        argmax_labels: List[int],
        label_scores: List[float],
        enforce_constraint: bool = False,
    ) -> List[Span]:
        # Map positions to list of span indices for quick lookup during DP.
        spans_ending_at: Dict[int, List[int]] = defaultdict(list)
        for idx in range(  # pylint: disable=consider-using-enumerate
            len(end_ids)
        ):
            if argmax_labels[idx] == 0:  # ignore null spans
                continue
            if start_ids[idx] <= pred_idx <= end_ids[idx]:
                # Skip spans overlapping with the predicate.
                continue
            if end_ids[idx] >= max_len:
                # Skip invalid spans longer than the sentence.
                continue
            spans_ending_at[end_ids[idx]].append(idx)
        if all(len(spans) == 0 for spans in spans_ending_at.values()):
            return []  # no spans at all, just return
        if enforce_constraint:
            label_states = [
                self._CORE_ARGS.get(self.label_inverse_vocab[label], -1)
                if label != 0
                else -1
                for label in argmax_labels
            ]
        else:
            # ignore constraints
            label_states = [-1] * len(argmax_labels)

        # Perform DP.
        # Each state is a tuple (time, core_args), where `core_args` is the set
        # of core arguments (ARGA, ARG0 to ARG5) previously selected,
        # represented in binary (so {ARG0, ARG2, ARG5} would be
        # 2^0 + 2^2 + 2^5 = 37).
        max_scores = [{0: 0.0}]
        # only record selected spans
        best_span_indices: List[Dict[int, int]] = [{}]

        for idx in range(max_len):
            cur_scores = max_scores[-1].copy()
            cur_span_idx = {}
            for span_idx in spans_ending_at[idx]:
                label_state = label_states[span_idx]
                prev_states = max_scores[start_ids[span_idx]]
                for state, prev_score in prev_states.items():
                    if label_state != -1 and (label_state & state != 0):
                        # A core argument of this type has already been selected
                        continue
                    score = prev_score + label_scores[span_idx]
                    new_state = state | label_state
                    if score > cur_scores.get(new_state, 0):
                        cur_scores[new_state] = score
                        cur_span_idx[new_state] = span_idx
            max_scores.append(cur_scores)
            best_span_indices.append(cur_span_idx)

        # Backtrack to obtain optimal span choices.
        srl = []
        pos = max_len
        state = max(
            (score, state) for state, score in max_scores[max_len].items()
        )[1]
        while pos > 0:
            best_span_idx = best_span_indices[pos].get(state, None)
            if best_span_idx is not None:
                assert end_ids[best_span_idx] == pos - 1
                srl.append(
                    Span(
                        start_ids[best_span_idx],
                        end_ids[best_span_idx],
                        self.label_inverse_vocab[argmax_labels[best_span_idx]],
                    )
                )
                pos = start_ids[best_span_idx]
                if label_states[best_span_idx] != -1:
                    state &= ~label_states[best_span_idx]
            else:
                pos -= 1
        return srl

[docs]    @torch.no_grad()
    def decode(
        self, inputs: tx.data.Batch, enforce_constraint: bool = False
    ) -> List[Dict[int, List[Span]]]:
        r"""Performs optimal decoding with dynamic programming.

        :returns: A nested structure of SRL spans, representing the (inner) list
            of spans for each predicate (middle `dict`) and for each example in
            the batch (outer list).
        """
        result_dict = self.forward(inputs)
        start_ids = result_dict["start_ids"].cpu().numpy()
        end_ids = result_dict["end_ids"].cpu().numpy()
        predicates = result_dict["predicates"].cpu().numpy()
        batch_size, num_arguments = start_ids.shape
        num_predicates = predicates.shape[1]

        total_scores = result_dict["total_scores"].view(
            batch_size, num_predicates, num_arguments, -1
        )
        label_scores, argmax_label = torch.max(total_scores, dim=3)
        argmax_label = argmax_label.cpu().numpy()
        label_scores = label_scores.cpu().numpy()
        sent_lengths = inputs.length.cpu().numpy()

        # Do DP one example at a time...
        batch_srl = []
        for b_idx in range(batch_size):
            cur_srl: Dict[int, List[Span]] = {}
            # ... and one predicate at a time.
            for pred_idx, predicate in enumerate(predicates[b_idx]):
                if predicate >= inputs.length[b_idx]:
                    # Skip invalid predicates outside the sentence.
                    continue
                srl = self._dp_decode(
                    sent_lengths[b_idx],
                    predicate,
                    start_ids[b_idx],
                    end_ids[b_idx],
                    argmax_label[b_idx, pred_idx],
                    label_scores[b_idx, pred_idx],
                    enforce_constraint,
                )
                if len(srl) > 0:
                    cur_srl[predicate] = srl
            batch_srl.append(cur_srl)
        return batch_srl