Source code for procrustes.permutation

# -*- coding: utf-8 -*-
# The Procrustes library provides a set of functions for transforming
# a matrix to make it as similar as possible to a target matrix.
#
# Copyright (C) 2017-2025 The QC-Devs Community
#
# This file is part of Procrustes.
#
# Procrustes is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# Procrustes is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>
#
# --
"""Permutation Procrustes Module."""
from typing import Optional

import numpy as np
import scipy
from scipy.optimize import linear_sum_assignment

from procrustes.kopt import kopt_heuristic_double, kopt_heuristic_single
from procrustes.utils import ProcrustesResult, _zero_padding, compute_error, setup_input_arrays

__all__ = ["permutation", "permutation_2sided"]



[docs]
def permutation(
    a: np.ndarray,
    b: np.ndarray,
    pad: bool = True,
    translate: bool = False,
    scale: bool = False,
    unpad_col: bool = False,
    unpad_row: bool = False,
    check_finite: bool = True,
    weight: Optional[np.ndarray] = None,
) -> ProcrustesResult:
    r"""Perform one-sided permutation Procrustes.

    Given matrix :math:`\mathbf{A}_{m \times n}` and a reference matrix :math:`\mathbf{B}_{m \times
    n}`, find the permutation transformation matrix :math:`\mathbf{P}_{n \times n}`
    that makes :math:`\mathbf{AP}` as close as possible to :math:`\mathbf{B}`. In other words,

    .. math::
       \underbrace{\text{min}}_{\left\{\mathbf{P} \left| {[\mathbf{P}]_{ij} \in \{0, 1\} \atop
       \sum_{i=1}^n [\mathbf{P}]_{ij} = \sum_{j=1}^n [\mathbf{P}]_{ij} = 1} \right. \right\}}
       \|\mathbf{A} \mathbf{P} - \mathbf{B}\|_{F}^2

    This Procrustes method requires the :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices to
    have the same shape, which is guaranteed with the default ``pad=True`` argument for any given
    :math:`\mathbf{A}` and :math:`\mathbf{B}` matrices. In preparing the :math:`\mathbf{A}` and
    :math:`\mathbf{B}` matrices, the (optional) order of operations is: **1)** unpad zero
    rows/columns, **2)** translate the matrices to the origin, **3)** weight entries of
    :math:`\mathbf{A}`, **4)** scale the matrices to have unit norm, **5)** pad matrices with zero
    rows/columns so they have the same shape.

    Parameters
    ----------
    a : ndarray
        The 2D-array :math:`\mathbf{A}` which is going to be transformed.
    b : ndarray
        The 2D-array :math:`\mathbf{B}` representing the reference matrix.
    pad : bool, optional
        Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices
        :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape.
    translate : bool, optional
        If True, both arrays are centered at origin (columns of the arrays will have mean zero).
    scale : bool, optional
        If True, both arrays are normalized with respect to the Frobenius norm, i.e.,
        :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and
        :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`.
    unpad_col : bool, optional
        If True, zero columns (with values less than 1.0e-8) on the right-hand side are removed.
    unpad_row : bool, optional
        If True, zero rows (with values less than 1.0e-8) at the bottom are removed.
    check_finite : bool, optional
        If True, convert the input to an array, checking for NaNs or Infs.
    weight : ndarray, optional
        The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the
        elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}`
        matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`.

    Returns
    -------
    res : ProcrustesResult
        The Procrustes result represented as a class:`utils.ProcrustesResult` object.

    Notes
    -----
    The optimal :math:`n \times n` permutation matrix is obtained by,

    .. math::
        \mathbf{P}^{\text{opt}} =
        \arg \underbrace{\text{min}}_{\left\{\mathbf{P} \left| {[\mathbf{P}]_{ij} \in \{0, 1\}
        \atop \sum_{i=1}^n [\mathbf{P}]_{ij} = \sum_{j=1}^n [\mathbf{P}]_{ij} = 1} \right. \right\}}
            \|\mathbf{A} \mathbf{P} - \mathbf{B}\|_{F}^2
      = \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {[\mathbf{P}]_{ij} \in \{0, 1\}
        \atop \sum_{i=1}^n [\mathbf{P}]_{ij} = \sum_{j=1}^n [\mathbf{P}]_{ij} = 1} \right. \right\}}
            \text{Tr}\left[\mathbf{P}^\dagger\mathbf{A}^\dagger\mathbf{B} \right]

    The solution is found by relaxing the problem into a linear programming problem. The solution
    to a linear programming problem is always at the boundary of the allowed region. So,

    .. math::
       \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {[\mathbf{P}]_{ij} \in \{0, 1\}
       \atop \sum_{i=1}^n [\mathbf{P}]_{ij} = \sum_{j=1}^n [\mathbf{P}]_{ij} = 1} \right. \right\}}
          \text{Tr}\left[\mathbf{P}^\dagger\mathbf{A}^\dagger\mathbf{B} \right] =
       \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {[\mathbf{P}]_{ij} \geq 0
       \atop \sum_{i=1}^n [\mathbf{P}]_{ij} = \sum_{j=1}^n [\mathbf{P}]_{ij} = 1} \right. \right\}}
          \text{Tr}\left[\mathbf{P}^\dagger\left(\mathbf{A}^\dagger\mathbf{B}\right) \right]

    This is a matching problem and can be solved by the Hungarian algorithm. The cost matrix is
    defined as :math:`\mathbf{A}^\dagger\mathbf{B}` and the `scipy.optimize.linear_sum_assignment`
    is used to solve for the permutation that maximizes the linear sum assignment problem.

    """
    # check inputs
    new_a, new_b = setup_input_arrays(
        a,
        b,
        unpad_col,
        unpad_row,
        pad,
        translate,
        scale,
        check_finite,
        weight,
    )
    # if number of rows is less than column, the arrays are made square
    if (new_a.shape[0] < new_a.shape[1]) or (new_b.shape[0] < new_b.shape[1]):
        new_a, new_b = _zero_padding(new_a, new_b, "square")

    # compute cost matrix C = A.T B
    c = np.dot(new_a.T, new_b)
    # compute permutation matrix using Hungarian algorithm
    p = _compute_permutation_hungarian(c)
    # compute one-sided permutation error
    error = compute_error(new_a, new_b, p)

    return ProcrustesResult(new_a=new_a, new_b=new_b, t=p, error=error)




[docs]
def permutation_2sided(
    a: np.ndarray,
    b: np.ndarray,
    single: bool = True,
    method: str = "k-opt",
    guess_p1: Optional[np.ndarray] = None,
    guess_p2: Optional[np.ndarray] = None,
    pad: bool = False,
    unpad_col: bool = False,
    unpad_row: bool = False,
    translate: bool = False,
    scale: bool = False,
    check_finite: bool = True,
    options: Optional[dict] = None,
    weight: Optional[np.ndarray] = None,
    lapack_driver: str = "gesvd",
) -> ProcrustesResult:
    r"""Perform two-sided permutation Procrustes.

    Parameters
    ----------
    a : ndarray
        The 2D-array :math:`\mathbf{A}` which is going to be transformed.
    b : ndarray
        The 2D-array :math:`\mathbf{B}` representing the reference matrix.
    single : bool, optional
        If `True`, the single-transformation Procrustes is performed to obtain :math:`\mathbf{P}`.
        If `False`, the two-transformations Procrustes is performed to obtain :math:`\mathbf{P}_1`
        and :math:`\mathbf{P}_2`.
    method : str, optional
        The method to solve for permutation matrices. For `single=False`, these include "flip-flop"
        and "k-opt" methods. For `single=True`, these include "approx-normal1", "approx-normal2",
        "approx-umeyama", "approx-umeyama-svd", "k-opt", "soft-assign", and "nmf".
    guess_p1 : np.ndarray, optional
        Guess for :math:`\mathbf{P}_1` matrix given as a 2D-array. This is only required for the
        two-transformations case specified by setting `single=False`.
    guess_p2 : np.ndarray, optional
        Guess for :math:`\mathbf{P}_2` matrix given as a 2D-array.
    pad : bool, optional
        Add zero rows (at the bottom) and/or columns (to the right-hand side) of matrices
        :math:`\mathbf{A}` and :math:`\mathbf{B}` so that they have the same shape.
    unpad_col : bool, optional
        If True, zero columns (with values less than 1.0e-8) on the right-hand side are removed.
    unpad_row : bool, optional
        If True, zero rows (with values less than 1.0e-8) at the bottom are removed.
    translate : bool, optional
        If True, both arrays are centered at origin (columns of the arrays will have mean zero).
    scale : bool, optional
        If True, both arrays are normalized with respect to the Frobenius norm, i.e.,
        :math:`\text{Tr}\left[\mathbf{A}^\dagger\mathbf{A}\right] = 1` and
        :math:`\text{Tr}\left[\mathbf{B}^\dagger\mathbf{B}\right] = 1`.
    check_finite : bool, optional
        If True, convert the input to an array, checking for NaNs or Infs.
    options : dict, optional
       A dictionary of method options.
    weight : ndarray, optional
        The 1D-array representing the weights of each row of :math:`\mathbf{A}`. This defines the
        elements of the diagonal matrix :math:`\mathbf{W}` that is multiplied by :math:`\mathbf{A}`
        matrix, i.e., :math:`\mathbf{A} \rightarrow \mathbf{WA}`.
    lapack_driver : {'gesvd', 'gesdd'}, optional
        Whether to use the more efficient divide-and-conquer approach ('gesdd') or the more robust
        general rectangular approach ('gesvd') to compute the singular-value decomposition with
        `scipy.linalg.svd`.

    Returns
    -------
    res : ProcrustesResult
        The Procrustes result represented as a class:`utils.ProcrustesResult` object.

    Notes
    -----
    Given matrix :math:`\mathbf{A}_{n \times n}` and a reference :math:`\mathbf{B}_{n \times n}`,
    find a permutation of rows/columns of :math:`\mathbf{A}_{n \times n}` that makes it as close as
    possible to :math:`\mathbf{B}_{n \times n}`. I.e.,

    .. math::
        &\underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\}
            \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}}
            \|\mathbf{P}^\dagger \mathbf{A} \mathbf{P} - \mathbf{B}\|_{F}^2\\
        = &\underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\}
            \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}}
            \text{Tr}\left[\left(\mathbf{P}^\dagger\mathbf{A}\mathbf{P} - \mathbf{B} \right)^\dagger
            \left(\mathbf{P}^\dagger\mathbf{A}\mathbf{P} - \mathbf{B} \right)\right] \\
        = &\underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\}
            \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}}
            \text{Tr}\left[\mathbf{P}^\dagger\mathbf{A}^\dagger\mathbf{P}\mathbf{B} \right]\\

    Here, :math:`\mathbf{P}_{n \times n}` is the permutation matrix. Given an intial guess, the
    best local minimum can be obtained by the iterative procedure,

    .. math::
       p_{ij}^{(n + 1)} = p_{ij}^{(n)} \sqrt{ \frac{2\left[\mathbf{T}^{(n)}\right]_{ij}}{\left[
                          \mathbf{P}^{(n)} \left( \left(\mathbf{P}^{(n)}\right)^T \mathbf{T} +
                          \left( \left(\mathbf{P}^{(n)}\right)^T \mathbf{T} \right)^T  \right)
                          \right]_{ij}} }

    where,

    .. math::
       \mathbf{T}^{(n)} = \mathbf{A} \mathbf{P}^{(n)} \mathbf{B}

    Using an initial guess, the iteration can stops when the change in :math:`d` is below the
    specified threshold,

    .. math::
       d = \text{Tr} \left[\left(\mathbf{P}^{(n+1)} -\mathbf{P}^{(n)} \right)^T
                           \left(\mathbf{P}^{(n+1)} -\mathbf{P}^{(n)} \right)\right]

    The outcome of the iterative procedure :math:`\mathbf{P}^{(\infty)}` is not a permutation
    matrix. So, the closest permutation can be found by setting ``refinement=True``. This uses
    :class:`procrustes.permutation.PermutationProcrustes` to find the closest permutation; that is,

    .. math::
       \underbrace{\text{min}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\}
                            \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}}
                            \|\mathbf{P} - \mathbf{P}^{(\infty)}\|_{F}^2
       = \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\}
                            \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}}
         \text{Tr}\left[\mathbf{P}^\dagger\mathbf{P}^{(\infty)} \right]

    The answer to this problem is a heuristic solution for the matrix-matching problem that seems
    to be relatively accurate.

    **Initial Guess:**

    Two possible initial guesses are inferred from the Umeyama procedure. One can find either the
    closest permutation matrix to :math:`\mathbf{U}_\text{Umeyama}` or to
    :math:`\mathbf{U}_\text{Umeyama}^\text{approx.}`.

    Considering the :class:`procrustes.permutation.PermutationProcrustes`, the resulting permutation
    matrix can be specified as initial guess through ``guess=umeyama`` and ``guess=umeyama_approx``,
    which solves:

    .. math::
        \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\}
                         \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}}
          \text{Tr}\left[\mathbf{P}^\dagger\mathbf{U}_\text{Umeyama} \right] \\
        \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\}
                         \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}}
          \text{Tr}\left[\mathbf{P}^\dagger\mathbf{U}_\text{Umeyama}^\text{approx.} \right]

    Another choice is to start by solving a normal permutation Procrustes problem. In other words,
    write new matrices, :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0`, with columns like,

    .. math::
       \begin{bmatrix}
        a_{ii} \\
        p \cdot \text{sgn}\left( a_{ij_\text{max}} \right)
                \underbrace{\text{max}}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\
        p^2 \cdot \text{sgn}\left( a_{ij_{\text{max}-1}} \right)
                  \underbrace{\text{max}-1}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\
        \vdots
       \end{bmatrix}

    Here, :math:`\text{max}-1` denotes the second-largest absolute value of elements,
    :math:`\text{max}-2` is the third-largest abosule value of elements, etc.

    The matrices :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0` have the diagonal elements of
    :math:`\mathbf{A}` and :math:`\mathbf{B}` in the first row, and below the first row has the
    largest off-diagonal element in row :math:`i`, the second-largest off-diagonal element, etc.
    The elements are weighted by a factor :math:`0 < p < 1`, so that the smaller elements are
    considered less important for matching. The matrices can be truncated after a few terms; for
    example, after the size of elements falls below some threshold. A reasonable choice would be
    to stop after :math:`\lfloor \frac{-2\ln 10}{\ln p} +1\rfloor` rows; this ensures that the
    size of the elements in the last row is less than 1% of those in the first off-diagonal row.

    There are obviously many different ways to construct the matrices :math:`\mathbf{A}^0` and
    :math:`\mathbf{B}^0`. Another, even better, method would be to try to encode not only what the
    off-diagonal elements are, but which element in the matrix they correspond to. One could do that
    by not only listing the diagonal elements, but also listing the associated off-diagonal element.
    I.e., the columns of :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0` would be,

    .. math::
       \begin{bmatrix}
        a_{ii} \\
        p \cdot a_{j_\text{max} j_\text{max}} \\
        p \cdot \text{sgn}\left( a_{ij_\text{max}} \right)
                \underbrace{\text{max}}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\
        p^2 \cdot a_{j_{\text{max}-1} j_{\text{max}-1}} \\
        p^2 \cdot \text{sgn}\left( a_{ij_{\text{max}-1}} \right)
                  \underbrace{\text{max}-1}_{1 \le j \le n} \left(\left|a_{ij}\right|\right)\\
        \vdots
       \end{bmatrix}

    In this case, you would stop the procedure after
    :math:`m = \left\lfloor {\frac{{ - 4\ln 10}}{{\ln p}} + 1} \right \rfloor` rows.

    Then one uses the :class:`procrustes.permutation.PermutationProcrustes` to match the constructed
    matrices :math:`\mathbf{A}^0` and :math:`\mathbf{B}^0` instead of :math:`\mathbf{A}` and
    :math:`\mathbf{B}`. I.e.,

    .. math::
        \underbrace{\text{max}}_{\left\{\mathbf{P} \left| {p_{ij} \in \{0, 1\}
                         \atop \sum_{i=1}^n p_{ij} = \sum_{j=1}^n p_{ij} = 1} \right. \right\}}
          \text{Tr}\left[\mathbf{P}^\dagger \left(\mathbf{A^0}^\dagger\mathbf{B^0}\right)\right]

    Please note that the "umeyama_approx" might give inaccurate permutation
    matrix. More specificity, this is a approximated Umeyama method. One example
    we can give is that when we compute the permutation matrix that transforms
    :math:`A` to :math:`B`, the "umeyama_approx" method can not give the exact
    permutation transformation matrix while "umeyama", "normal1" and "normal2" do.

    .. math::
        A =
        \begin{bmatrix}
             4 &  5 & -3 &  3 \\
             5 &  7 &  3 & -5 \\
            -3 &  3 &  2 &  2 \\
             3 & -5 &  2 &  5 \\
        \end{bmatrix} \\
        B =
        \begin{bmatrix}
             73 &  100 &   73 &  -62 \\
            100 &  208 & -116 &  154 \\
             73 & -116 &  154 &  100 \\
            -62 &  154 &  100 &  127 \\
        \end{bmatrix} \\

    """
    # check single argument
    if not isinstance(single, bool):
        raise TypeError(f"Argument single is not a boolean! Given type={type(single)}")

    # check inputs
    new_a, new_b = setup_input_arrays(
        a, b, unpad_col, unpad_row, pad, translate, scale, check_finite, weight
    )

    # check that A & B are square in case of single transformation
    if single and new_a.shape[0] != new_a.shape[1]:
        raise ValueError(
            f"For single={single}, matrix A should be square but A.shape={new_a.shape}"
            "Check pad, unpad_col, and unpad_row arguments."
        )

    if single and new_b.shape[0] != new_b.shape[1]:
        raise ValueError(
            f"For single={single}, matrix B should be square but B.shape={new_b.shape}"
            "Check pad, unpad_col, and unpad_row arguments."
        )

    # print a statement if user-specified guess is not used
    if method.startswith("approx") and guess_p1 is not None:
        print(f"Method={method} does not use an initial guess, so guess_p1 is ignored!")
    if method.startswith("approx") and guess_p2 is not None:
        print(f"Method={method} does not use an initial guess, so guess_p2 is ignored!")

    # get the number of rows & columns of matrix A
    m, n = new_a.shape

    # assign & check initial guess for P1
    if single and guess_p1 is not None:
        raise ValueError(f"For single={single}, P1 is transpose of P2, so guess_p1 should be None.")
    if not single:
        if guess_p1 is None:
            guess_p1 = np.eye(m)
        if guess_p1.shape != (m, m):
            raise ValueError(f"Argument guess_p1 should be either None or a ({m}, {m}) array.")

    # assign & check initial guess for P2
    if guess_p2 is None:
        guess_p2 = np.eye(n)
    if guess_p2.shape != (n, n):
        raise ValueError(f"Argument guess_p2 should be either None or a ({n}, {n}) array.")

    # check options dictionary & assign default keys
    defaults = {"tol": 1.0e-8, "maxiter": 500, "k": 3}
    if options is not None:
        if not isinstance(options, dict):
            raise ValueError(f"Argument options should be a dictionary. Given type={type(options)}")
        # pylint: disable=C0201
        if not all(k in defaults.keys() for k in options.keys()):
            raise ValueError(
                f"Argument options should only have {defaults.keys()} keys. "
                f"Given options contains {options.keys()} keys!"
            )
        # update defaults dictionary to use the specified options
        defaults.update(options)

    # 2-sided permutation Procrustes with two transformations
    # -------------------------------------------------------
    if not single:
        if method == "flip-flop":
            # compute permutations using flip-flop algorithm
            perm1, perm2, error = _permutation_2sided_2trans_flipflop(
                new_a, new_b, defaults["tol"], defaults["maxiter"], guess_p1, guess_p2
            )
        elif method == "k-opt":
            # compute permutations using k-opt heuristic search
            fun_error = lambda p1, p2: compute_error(new_a, new_b, p2, p1.T)
            perm1, perm2, error = kopt_heuristic_double(
                fun_error, p1=guess_p1, p2=guess_p2, k=defaults["k"]
            )
        else:
            raise ValueError(f"Method={method} not supported for single={single} transformation!")

        return ProcrustesResult(error=error, new_a=new_a, new_b=new_b, t=perm2, s=perm1)

    # 2-sided permutation Procrustes with one transformation
    # ------------------------------------------------------
    # The (un)directed iterative procedure for finding the permutation matrix takes the square
    # root of the matrix entries, which can result in complex numbers if the entries are
    # negative. To avoid this, all matrix entries are shifted (by the smallest amount) to be
    # positive. This causes no change to the objective function, as it's a constant value
    # being added to all entries of a and b.
    shift = 1.0e-6
    if np.min(new_a) < 0 or np.min(new_b) < 0:
        shift += abs(min(np.min(new_a), np.min(new_b)))
    # shift is a float, so even if new_a or new_b are ints, the positive matrices are floats
    # default shift is not zero to avoid division by zero later in the algorithm
    pos_a = new_a + shift
    pos_b = new_b + shift

    if method == "approx-normal1":
        tmp_a = _approx_permutation_2sided_1trans_normal1(a)
        tmp_b = _approx_permutation_2sided_1trans_normal1(b)
        perm = permutation(tmp_a, tmp_b).t

    elif method == "approx-normal2":
        tmp_a = _approx_permutation_2sided_1trans_normal2(a)
        tmp_b = _approx_permutation_2sided_1trans_normal2(b)
        perm = permutation(tmp_a, tmp_b).t

    elif method == "approx-umeyama":
        perm = _approx_permutation_2sided_1trans_umeyama(pos_a, pos_b)

    elif method == "approx-umeyama-svd":
        perm = _approx_permutation_2sided_1trans_umeyama_svd(a, b, lapack_driver)

    elif method == "k-opt":
        fun_error = lambda p: compute_error(pos_a, pos_b, p, p.T)
        perm, error = kopt_heuristic_single(fun_error, p0=guess_p2, k=defaults["k"])

    elif method == "soft-assign":
        raise NotImplementedError

    elif method == "nmf":
        # check whether A & B are symmetric (within a relative & absolute tolerance)
        is_pos_a_symmetric = np.allclose(pos_a, pos_a.T, rtol=1.0e-05, atol=1.0e-08)
        is_pos_b_symmetric = np.allclose(pos_b, pos_b.T, rtol=1.0e-05, atol=1.0e-08)

        if is_pos_a_symmetric and is_pos_b_symmetric:
            # undirected graph matching problem (iterative procedure)
            perm = _permutation_2sided_1trans_undirected(
                pos_a, pos_b, guess_p2, defaults["tol"], defaults["maxiter"]
            )
        else:
            # directed graph matching problem (iterative procedure)
            perm = _permutation_2sided_1trans_directed(
                pos_a, pos_b, guess_p2, defaults["tol"], defaults["maxiter"]
            )
    else:
        raise ValueError(f"Method={method} not supported for single={single} transformation!")

    # some of the methods for 2-sided-1-transformation permutation procrustes does not produce a
    # permutation matrix. So, their output is treated like a guess, and the closest permutation
    # matrix is found using 1-sided permutation procrustes (where A=I & B=perm)
    # Even though this step is not needed for ALL methods (e.g. k-opt, normal1, & normal2), to
    # make the code simple, this step is performed for all methods as its cost is negligible.
    perm = permutation(
        np.eye(perm.shape[0]),
        perm,
        translate=False,
        scale=False,
        unpad_col=False,
        unpad_row=False,
        check_finite=True,
    ).t
    # compute error
    error = compute_error(new_a, new_b, t=perm, s=perm.T)

    return ProcrustesResult(error=error, new_a=new_a, new_b=new_b, t=perm, s=perm.T)



def _permutation_2sided_2trans_flipflop(
    n: np.ndarray,
    m: np.ndarray,
    tol: float,
    max_iter: int,
    p0: Optional[np.ndarray] = None,
    q0: Optional[np.ndarray] = None,
):
    # two-sided permutation Procrustes with 2 transformations :math:` {\(\vert PNQ-M \vert\)}^2_F`
    # taken from page 64 in parallel solution of svd-related problems, with applications
    # Pythagoras Papadimitriou, University of Manchester, 1993

    # initial guesses: set P1 to identity if guess P0 is not given, and compute Q1 using 1-sided
    # permutation procrustes where A=(P1N), B=M, & cost = A.T B
    p1 = p0
    if p1 is None:
        p1 = np.eye(m.shape[0])
    q1 = _compute_permutation_hungarian(np.dot(np.dot(n.T, p1.T), m))
    # compute initial error1 = |(P1)N(Q1) - M|
    error1 = compute_error(n, m, q1, p1)

    step = 0
    while error1 > tol and step < max_iter:
        # update P1 using 1-sided permutation procrustes where A=(NQ1).T, B=M.T, & cost = A.T B
        # 1-sided procrustes finds the right-hand-side transformation T, so to solve for P1, one
        # needs to minimize |Q.T N.T P.T - M.T| which is the same as original objective function.
        p1 = _compute_permutation_hungarian(np.dot(np.dot(n, q1), m.T)).T
        # update Q1 using 1-sided permutation procrustes where A=(P1N).T, B=M, & cost = A.T B
        q1 = _compute_permutation_hungarian(np.dot(np.dot(n.T, p1.T), m))
        error1 = compute_error(n, m, q1, p1)
        step += 1
    if step == max_iter:
        print(f"Maximum iterations reached in 1st case of flip-flop! error={error1} & tol={tol}")

    # initial guesses: set Q2 to identity if guess Q0 is not given, and compute P2 using 1-sided
    # permutation procrustes where A=(NQ2).T, B=M.T, & cost = A.T B
    q2 = q0
    if q2 is None:
        q2 = np.eye(m.shape[1])
    p2 = _compute_permutation_hungarian(np.dot(np.dot(n, q2), m.T)).T
    # compute initial error2 = |(P2)N(Q2) - M|
    error2 = compute_error(n, m, q2, p2)

    step = 0
    while error2 > tol and step < max_iter:
        # update Q2 using 1-sided permutation procrustes where A=(P2N), B=M, & cost = A.T B
        q2 = _compute_permutation_hungarian(np.dot(np.dot(n.T, p2.T), m))
        # update P2 using 1-sided permutation procrustes where A=(NQ2).T, B=M.T, & cost = A.T B
        p2 = _compute_permutation_hungarian(np.dot(np.dot(n, q2), m.T)).T
        error2 = compute_error(n, m, q2, p2)
        step += 1
    if step == max_iter:
        print(f"Maximum iterations reached in 2nd case of flip-flop! error={error1} & tol={tol}")

    # return permutations corresponding to the lowest error
    if error1 <= error2:
        return p1, q1, error1
    return p2, q2, error2


def _compute_permutation_hungarian(cost_matrix: np.ndarray) -> np.ndarray:
    # solve linear sum assignment problem to get the row/column indices of optimal assignment
    row_ind, col_ind = linear_sum_assignment(cost_matrix, maximize=True)
    # make the permutation matrix by setting the corresponding elements to 1
    perm = np.zeros(cost_matrix.shape)
    perm[(row_ind, col_ind)] = 1
    return perm


def _approx_permutation_2sided_1trans_normal1(a: np.ndarray) -> np.ndarray:
    # This assumes that array_a has all positive entries, this guess does not match that found
    #    in the notes/paper because it doesn't include the sign function.
    # build the empty target array
    array_c = np.zeros(a.shape)
    # Fill the first row of array_c with diagonal entries
    array_c[0, :] = a.diagonal()
    array_mask = ~np.eye(a.shape[0], dtype=bool)
    # get all the non-diagonal element
    array_c_non_diag = (a[array_mask]).T.reshape(a.shape[0], a.shape[1] - 1)
    array_c_non_diag = array_c_non_diag[
        np.arange(np.shape(array_c_non_diag)[0])[:, np.newaxis], np.argsort(abs(array_c_non_diag))
    ]

    # form the right format in order to combine with matrix A
    array_c_sorted = np.fliplr(array_c_non_diag).T
    # fill the array_c with array_c_sorted
    array_c[1:, :] = array_c_sorted
    # the weight matrix
    weight_c = np.zeros(a.shape)
    weight_p = np.power(2, -0.5)

    for weight in range(a.shape[0]):
        weight_c[weight, :] = np.power(weight_p, weight)
    # build the new matrix array_new
    array_new = np.multiply(array_c, weight_c)

    return array_new


def _approx_permutation_2sided_1trans_normal2(a: np.ndarray) -> np.ndarray:
    # This assumes that array_a has all positive entries, this guess does not match that found
    #    in the notes/paper because it doesn't include the sign function.
    array_mask_a = ~np.eye(a.shape[0], dtype=bool)
    # array_off_diag0 is the off diagonal elements of A
    array_off_diag = a[array_mask_a].reshape((a.shape[0], a.shape[1] - 1))
    # array_off_diag1 is sorted off diagonal elements of A
    array_off_diag = array_off_diag[
        np.arange(np.shape(array_off_diag)[0])[:, np.newaxis], np.argsort(abs(array_off_diag))
    ]
    array_off_diag = np.fliplr(array_off_diag).T

    # array_c is newly built matrix B without weights
    # build array_c with the expected shape
    col_num_new = a.shape[0] * 2 - 1
    array_c = np.zeros((col_num_new, a.shape[1]))
    array_c[0, :] = a.diagonal()

    # use inf to represent the diagonal element
    a_inf = a - np.diag(np.diag(a)) + np.diag([-np.inf] * a.shape[0])
    index_inf = np.argsort(-np.abs(a_inf), axis=1)

    # the weight matrix
    weight_p = np.power(2, -0.5)
    weight_c = np.zeros((col_num_new, a.shape[1]))
    weight_c[0, :] = np.power(weight_p, 0)

    for index_col in range(1, a.shape[0]):
        # the index_col*2 row of array_c
        array_c[index_col * 2, :] = array_off_diag[index_col - 1, :]
        # the index_col*2-1 row of array_c
        array_c[index_col * 2 - 1, :] = a[index_inf[:, index_col], index_inf[:, index_col]]

        # the index_col*2 row of weight_c
        weight_c[index_col * 2, :] = np.power(weight_p, index_col)
        # the index_col*2 row of weight_c
        weight_c[index_col * 2 - 1, :] = np.power(weight_p, index_col)

    # the new matrix B
    array_new = np.multiply(array_c, weight_c)
    return array_new


def _approx_permutation_2sided_1trans_umeyama(a: np.ndarray, b: np.ndarray) -> np.ndarray:
    # check whether A & B are symmetric (within a relative & absolute tolerance)
    is_a_symmetric = np.allclose(a, a.T, rtol=1.0e-05, atol=1.0e-08)
    is_b_symmetric = np.allclose(b, b.T, rtol=1.0e-05, atol=1.0e-08)
    # symmetrize A & B if not symmetric
    if not (is_a_symmetric and is_b_symmetric):
        a = _symmetrize_matrix(a)
        b = _symmetrize_matrix(b)

    # compute normalized normalized eigenvector of A & B
    # in some cases, A and B can be complex matrix (when symmetrizing non-symmetric A & B),
    # the np.linalg.eigh returns the eigenvalues and eigenvectors of a complex Hermitian
    # (conjugate symmetric) or a real symmetric matrix.
    _, ua = np.linalg.eigh(a)
    _, ub = np.linalg.eigh(b)
    # for complex input, x + iy, the absolute value is np.sqrt(x**2 + y**2)
    u_umeyama = np.dot(np.abs(ua), np.abs(ub.T))
    return u_umeyama


def _approx_permutation_2sided_1trans_umeyama_svd(
    a: np.ndarray, b: np.ndarray, lapack_driver: str
) -> np.ndarray:
    # compute u_umeyama
    perm = _approx_permutation_2sided_1trans_umeyama(a, b)
    # compute approximated umeyama matrix
    u, _, vt = scipy.linalg.svd(perm, lapack_driver=lapack_driver)
    u_umeyama_approx = np.dot(np.abs(u), np.abs(vt))
    return u_umeyama_approx


def _symmetrize_matrix(a: np.ndarray) -> np.ndarray:
    # symmetrized matrix A would be complex
    return (a + a.T) * 0.5 + (a - a.T) * 0.5 * 1j


def _permutation_2sided_1trans_undirected(
    a: np.ndarray, b: np.ndarray, guess: np.ndarray, tol: float, iteration: int
) -> np.ndarray:
    """Solve for 2-sided permutation Procrustes with 1-transformation when A & B are symmetric."""

    p_old = guess
    change = np.inf
    step = 0

    while change > tol and step < iteration:
        # compute alpha matrix
        temp = np.dot(a, np.dot(p_old, b))
        alpha = np.dot(p_old.T, temp)
        alpha = (alpha + alpha.T) / 2
        # compute new permutation matrix & change
        p_new = p_old * np.sqrt(temp / np.dot(p_old, alpha))
        change = np.trace(np.dot((p_new - p_old).T, (p_new - p_old)))
        # update permutation matrix
        p_old = p_new
        step += 1

    if step == iteration:
        print(f"Maximum iteration reached! change={change} & tolerance={tol}")

    return p_new


def _permutation_2sided_1trans_directed(
    a: np.ndarray, b: np.ndarray, guess: np.ndarray, tol: float, iteration: int
) -> np.ndarray:
    """Solve for 2-sided permutation Procrustes with 1-transformation."""

    # Algorithm 2 from Appendix of Procrustes paper
    p_old = guess
    change = np.inf
    step = 0
    while change > tol and step < iteration:
        # compute alpha matrix
        tmp1 = np.dot(a, np.dot(p_old, b.T))
        tmp2 = np.dot(a.T, np.dot(p_old, b))
        alpha = 0.25 * np.dot(p_old.T, tmp1 + tmp2) + np.dot((tmp1 + tmp2).T, p_old)
        # compute new permutation matrix & change
        p_new = p_old * np.sqrt((tmp1 + tmp2) / (2 * np.dot(p_old, alpha)))
        change = np.trace(np.dot((p_new - p_old).T, (p_new - p_old)))
        # update permutation matrix
        p_old = p_new
        step += 1

    if step == iteration:
        print(f"Maximum iteration reached! change={change} & tolerance={tol}")

    return p_new