# Source code for bayespy.utils.linalg

```################################################################################
# Copyright (C) 2011-2014 Jaakko Luttinen
#
################################################################################

"""
General numerical functions and methods.

"""

import itertools
import numpy as np
import scipy as sp
#import scipy.linalg.decomp_cholesky as decomp
import scipy.linalg as linalg
import scipy.special as special
import scipy.optimize as optimize
import scipy.sparse as sparse
#import scikits.sparse.cholmod as cholmod

# THIS IS SOME NEW GENERALIZED UFUNC FOR LINALG FEATURE, NOT IN OFFICIAL NUMPY
# REPO YET
#import numpy.linalg._gufuncs_linalg as gula
#import numpy.core.gufuncs_linalg as gula

from . import misc

[docs]def chol(C, ndim=1):
if sparse.issparse(C):
if ndim != 1:
raise NotImplementedError()
# Sparse Cholesky decomposition (returns a Factor object)
return cholmod.cholesky(C)
else:
# Computes Cholesky decomposition for a collection of matrices.
# The last 2*ndim axes of C are considered as the matrix.
if ndim == 0:
return np.sqrt(C)
shape_original = np.shape(C)[-ndim:]
C = (
C if ndim == 1 else
misc.flatten_axes(C, ndim, ndim)
)
if np.shape(C)[-1] != np.shape(C)[-2]:
raise ValueError("Not square matrix w.r.t. ndim sense")
U = np.empty(np.shape(C))
for i in misc.nested_iterator(np.shape(U)[:-2]):
try:
# Handle (0, 0) matrices. See:
# https://github.com/scipy/scipy/issues/8056
U[i] = (
np.empty((0,0)) if np.size(C[i]) == 0 else
linalg.cho_factor(C[i])[0]
)
except np.linalg.linalg.LinAlgError:
raise Exception("Matrix not positive definite")
return (
U if ndim == 1 else
misc.reshape_axes(U, shape_original, shape_original)
)

[docs]def chol_solve(U, b, out=None, matrix=False, ndim=1):
if isinstance(U, np.ndarray) or np.isscalar(U):
if sparse.issparse(b):
b = b.toarray()

if ndim == 0:
return (b / U) / U

shape = np.shape(U)[-ndim:]
U = (
U if ndim == 1 else
misc.flatten_axes(U, ndim, ndim)
)

if matrix:
shape_b = np.shape(b)[-ndim:]
B = (
b if ndim == 1 else
misc.flatten_axes(b, ndim, ndim)
)
B = transpose(B, ndim=1)
U = U[...,None,:,:]
else:
B = (
b if ndim == 1 else
misc.flatten_axes(b, ndim)
)

# Allocate memory
sh_u = U.shape[:-2]
sh_b = B.shape[:-1]
l_u = len(sh_u)
l_b = len(sh_b)

# Check which axis are iterated over with B along with U
ind_b = [slice(None)] * l_b
l_min = min(l_u, l_b)
jnd_b = tuple(i for i in range(-l_min,0) if sh_b[i]==sh_u[i])

if out == None:
# Shape of the result (broadcasting rules)
#out = np.zeros(np.shape(B))
out = np.zeros(sh + B.shape[-1:])

for i in misc.nested_iterator(np.shape(U)[:-2]):

# The goal is to run Cholesky solver once for all vectors of B
# for which the matrices of U are the same (according to the
# broadcasting rules). Thus, we collect all the axes of B for
# which U is singleton and form them as a 2-D matrix and then
# run the solver once.

# Select those axes of B for which U and B are not singleton
for j in jnd_b:
ind_b[j] = i[j]

# Collect all the axes for which U is singleton
b = B[tuple(ind_b) + (slice(None),)]

# Reshape it to a 2-D (or 1-D) array
orig_shape = b.shape
if b.ndim > 1:
b = b.reshape((-1, b.shape[-1]))

# slice(None) to all preceeding axes and ellipsis for the last
# axis:
if len(ind_b) < len(sh):
ind_out = (slice(None),) + tuple(ind_b) + (slice(None),)
else:
ind_out = tuple(ind_b) + (slice(None),)

out[ind_out] = (
# Handle (0, 0) matrices. See:
# https://github.com/scipy/scipy/issues/8056
np.empty(orig_shape) if np.size(U[i]) == 0 else
linalg.cho_solve(
(U[i], False),
b.T
).T.reshape(orig_shape)
)

if matrix:
out = transpose(out, ndim=1)
out = (
out if ndim == 1 else
misc.reshape_axes(out, shape, shape_b)
)
else:
out = (
out if ndim == 1 else
misc.reshape_axes(out, shape)
)

return out

elif isinstance(U, cholmod.Factor):
if ndim != 1:
raise NotImplementedError()
if matrix:
raise NotImplementedError()
if sparse.issparse(b):
b = b.toarray()
return U.solve_A(b)
else:
raise ValueError("Unknown type of Cholesky factor")

[docs]def chol_inv(U, ndim=1):
if isinstance(U, np.ndarray) or np.isscalar(U):
if ndim == 0:
return (1 / U) / U
shape = np.shape(U)[-ndim:]
U = (
U if ndim == 1 else
misc.flatten_axes(U, ndim, ndim)
)
# Allocate memory
V = np.tile(np.identity(np.shape(U)[-1]), np.shape(U)[:-2]+(1,1))
for i in misc.nested_iterator(np.shape(U)[:-2]):
V[i] = (
# Handle (0, 0) matrices. See:
# https://github.com/scipy/scipy/issues/8056
np.empty((0, 0)) if np.size(V[i]) == 0 else
linalg.cho_solve(
(U[i], False),
V[i],
overwrite_b=True # This would need Fortran order
)
)
V = (
V if ndim == 1 else
misc.reshape_axes(V, shape, shape)
)
return V

elif isinstance(U, cholmod.Factor):
raise NotImplementedError
if ndim != 1:
raise NotImplementedError()
else:
raise ValueError("Unknown type of Cholesky factor")

[docs]def chol_logdet(U, ndim=1):
if isinstance(U, np.ndarray) or np.isscalar(U):
if ndim == 0:
return 2 * np.log(U)
U = (
U if ndim == 1 else
misc.flatten_axes(U, ndim, ndim)
)
return 2*np.sum(np.log(np.einsum('...ii->...i',U)), axis=-1)
elif isinstance(U, cholmod.Factor):
if ndim != 1:
raise NotImplementedError()
return np.sum(np.log(U.D()))
else:
raise ValueError("Unknown type of Cholesky factor")

[docs]def logdet_chol(U):
if isinstance(U, np.ndarray):
# Computes Cholesky decomposition for a collection of matrices.
return 2*np.sum(np.log(np.einsum('...ii->...i', U)), axis=(-1,))
elif isinstance(U, cholmod.Factor):
return np.sum(np.log(U.D()))

[docs]def logdet_tri(R):
"""
Logarithm of the absolute value of the determinant of a triangular matrix.
"""
return np.sum(np.log(np.abs(np.einsum('...ii->...i', R))))

[docs]def logdet_cov(C, ndim=1):
return chol_logdet(chol(C, ndim=ndim), ndim=ndim)

[docs]def solve_triangular(U, B, ndim=1, **kwargs):
if ndim != 1:
raise NotImplementedError("Not yet implemented for ndim!=1")
# Allocate memory
U = np.atleast_2d(U)
B = np.atleast_1d(B)
sh_u = U.shape[:-2]
sh_b = B.shape[:-1]
l_u = len(sh_u)
l_b = len(sh_b)

# Check which axis are iterated over with B along with U
ind_b = [slice(None)] * l_b
l_min = min(l_u, l_b)
jnd_b = tuple(i for i in range(-l_min,0) if sh_b[i]==sh_u[i])

# Shape of the result (broadcasting rules)
out = np.zeros(sh + B.shape[-1:])
for i in misc.nested_iterator(np.shape(U)[:-2]):

# The goal is to run triangular solver once for all vectors of
# B for which the matrices of U are the same (according to the
# broadcasting rules). Thus, we collect all the axes of B for
# which U is singleton and form them as a 2-D matrix and then
# run the solver once.

# Select those axes of B for which U and B are not singleton
for j in jnd_b:
ind_b[j] = i[j]

# Collect all the axes for which U is singleton
b = B[tuple(ind_b) + (slice(None),)]

# Reshape it to a 2-D (or 1-D) array
orig_shape = b.shape
if b.ndim > 1:
b = b.reshape((-1, b.shape[-1]))

# slice(None) to all preceeding axes and ellipsis for the last
# axis:
if len(ind_b) < len(sh):
ind_out = (slice(None),) + tuple(ind_b) + (slice(None),)
else:
ind_out = tuple(ind_b) + (slice(None),)

out[ind_out] = linalg.solve_triangular(U[i],
b.T,
**kwargs).T.reshape(orig_shape)

return out

[docs]def inner(*args, ndim=1):
"""
Compute inner product.

The number of arrays is arbitrary.  The number of dimensions is arbitrary.
"""
axes = tuple(range(-ndim,0))
return misc.sum_product(*args, axes_to_sum=axes)

[docs]def outer(A, B, ndim=1):
"""
Computes outer product over the last axes of A and B.

The other axes are broadcasted. Thus, if A has shape (..., N) and B has
shape (..., M), then the result has shape (..., N, M).

Using the argument `ndim` it is possible to change that how many axes
trailing axes are used for the outer product. For instance, if ndim=3, A and
B have shapes (...,N1,N2,N3) and (...,M1,M2,M3), the result has shape
(...,N1,M1,N2,M2,N3,M3).
"""
if not isinstance(ndim, int) or ndim < 0:
raise ValueError('ndim must be non-negative integer')
if ndim > 0:
if ndim > np.ndim(A):
raise ValueError('Argument ndim larger than ndim of the first '
'parameter')
if ndim > np.ndim(B):
raise ValueError('Argument ndim larger than ndim of the second '
'parameter')
shape_A = np.shape(A) + (1,)*ndim
shape_B = np.shape(B)[:-ndim] + (1,)*ndim + np.shape(B)[-ndim:]
A = np.reshape(A, shape_A)
B = np.reshape(B, shape_B)
return np.asanyarray(A) * np.asanyarray(B)

def _dot(A, B):
"""
Dot product which handles broadcasting properly.

Future NumPy will have a better built-in implementation for this.
"""
A_plates = np.shape(A)[:-2]
B_plates = np.shape(B)[:-2]
M = np.shape(A)[-2]
N = np.shape(B)[-1]
if Y_plates == ():
return np.dot(A, B)
indices = misc.nested_iterator(Y_plates)
Y_shape = Y_plates + (M, N)
Y = np.zeros(Y_shape)
for i in indices:
Y[i] = np.dot(A[misc.safe_indices(i, A_plates)],
B[misc.safe_indices(i, B_plates)])
return Y

[docs]def dot(*arrays):
"""
Compute matrix-matrix product.

You can give multiple arrays, the dot product is computed from left to
right: A1*A2*A3*...*AN. The dot product is computed over the last two axes
of each arrays. All other axes must be broadcastable.
"""
if len(arrays) == 0:
return 0
else:
Y = np.asanyarray(arrays[0])
for X in arrays[1:]:
X = np.asanyarray(X)
if np.ndim(Y) < 2 or np.ndim(X) < 2:
raise ValueError("Must be at least 2-D arrays")
if np.shape(Y)[-1] != np.shape(X)[-2]:
raise ValueError("Dimensions do not match")
# Replace this with numpy.dot when NumPy implements broadcasting in dot
Y = _dot(Y, X)
#Y = np.einsum('...ik,...kj->...ij', Y, X)
#Y = gula.matrix_multiply(Y, X)
return Y

[docs]def tracedot(A, B):
"""
Computes trace(A*B).
"""
return np.einsum('...ij,...ji->...', A, B)

[docs]def inv(A, ndim=1):
"""
General array inversion.

Supports broadcasting and inversion of multidimensional arrays.  For
instance, an array with shape (4,3,2,3,2) could mean that there are four
(3*2) x (3*2) matrices to be inverted. This can be done by inv(A, ndim=2).
For inverting scalars, ndim=0. For inverting matrices, ndim=1.
"""
A = np.asanyarray(A)
if ndim == 0:
return 1 / A
elif ndim == 1:
return np.linalg.inv(A)
else:
raise NotImplementedError()

[docs]def mvdot(A, b, ndim=1):
"""
Compute matrix-vector product.

"""
# TODO/FIXME: A bug in inner1d:
# https://github.com/numpy/numpy/issues/3338
#
# b = np.asanyarray(b)
# return gula.inner1d(A, b[...,np.newaxis,:])
#
if ndim > 0:

return inner(A, b, ndim=ndim)
## if ndim != 1:
##     raise NotImplementedError("mvdot not yet implemented for ndim!=1")

## return _dot(A, b[...,None])[...,0]
## #return np.einsum('...ik,...k->...i', A, b)

[docs]def mmdot(A, B, ndim=1):
"""
Compute matrix-matrix product.

"""
if ndim == 0:
return A * B
elif ndim == 1:
return _dot(A, B)
else:
raise Exception("mmdot not yet implemented for ndim>1")
#return np.einsum('...ik,...kj->...ij', A, B)

[docs]def transpose(X, ndim=1):
"""
Transpose the matrix.
"""
for n in range(ndim):
X = np.swapaxes(X, -1-n, -1-ndim-n)
return X
## if ndim != 1:
##     raise Exception("transpose not yet implemented for ndim!=1")
## return np.swapaxes(X, -1, -2)

[docs]def m_dot(A,b):
raise DeprecationWarning()
# Compute matrix-vector product over the last two axes of A and
# the last axes of b.  Other axes are broadcasted. If A has shape
# (..., M, N) and b has shape (..., N), then the result has shape
# (..., M)

#b = reshape(b, shape(b)[:-1] + (1,) + shape(b)[-1:])
#return np.dot(A, b)
return np.einsum('...ik,...k->...i', A, b)
# TODO: Use einsum!!
#return np.sum(A*b[...,np.newaxis,:], axis=(-1,))

[docs]def block_banded_solve(A, B, y):
"""
Invert symmetric, banded, positive-definite matrix.

A contains the diagonal blocks.

B contains the superdiagonal blocks (their transposes are the
subdiagonal blocks).

Shapes:
A: (...,   N, D, D)
B: (..., N-1, D, D)
y: (...,   N,    D)

The algorithm is basically LU decomposition.

Computes only the diagonal and super-diagonal blocks of the
inverse. The true inverse is dense, in general.

Assume each block has the same size.

Return:
* inverse blocks
* solution to the system
* log-determinant
"""

# Number of time instance and dimensionality
N = np.shape(y)[-2]
D = np.shape(y)[-1]

# Check the shape of the diagonal blocks
if np.shape(A)[-3] != N:
raise ValueError("The number of diagonal blocks is incorrect")
if np.shape(A)[-2:] != (D,D):
raise ValueError("The diagonal blocks have wrong shape")

# Check the shape of the super-diagonal blocks
if np.shape(B)[-3] != N-1:
raise ValueError("The number of super-diagonal blocks is incorrect")
if np.shape(B)[-2:] != (D,D):
raise ValueError("The diagonal blocks have wrong shape")

np.shape(B)[:-3])
np.shape(y)[:-2])

V = np.empty(plates_VC+(N,D,D))
C = np.empty(plates_VC+(N-1,D,D))
x = np.empty(plates_y+(N,D))

#
# Forward recursion
#

# In the forward recursion, store the Cholesky factor in V. So you
# don't need to recompute them in the backward recursion.

# TODO: This whole algorithm could be implemented as in-place operation.
# Might be a nice feature (optional?)

# TODO/FIXME: chol_solve has quite a high overhead because it uses shape
# manipulations. Use some more raw method instead.

x[...,0,:] = y[...,0,:]
V[...,0,:,:] = chol(A[...,0,:,:])
ldet = chol_logdet(V[...,0,:,:])
for n in range(N-1):
# Compute the solution of the system
x[...,n+1,:] = (y[...,n+1,:]
- mvdot(misc.T(B[...,n,:,:]),
chol_solve(V[...,n,:,:],
x[...,n,:])))
# Compute the superdiagonal block of the inverse
C[...,n,:,:] = chol_solve(V[...,n,:,:],
B[...,n,:,:],
matrix=True)
# Compute the diagonal block
V[...,n+1,:,:] = (A[...,n+1,:,:]
- mmdot(misc.T(B[...,n,:,:]), C[...,n,:,:]))
# Ensure symmetry by 0.5*(V+V.T)
V[...,n+1,:,:] = 0.5 * (V[...,n+1,:,:] + misc.T(V[...,n+1,:,:]))
# Compute and store the Cholesky factor of the diagonal block
V[...,n+1,:,:] = chol(V[...,n+1,:,:])
# Compute the log-det term here, too
ldet += chol_logdet(V[...,n+1,:,:])

#
# Backward recursion
#
x[...,-1,:] = chol_solve(V[...,-1,:,:], x[...,-1,:])
V[...,-1,:,:] = chol_inv(V[...,-1,:,:])
for n in reversed(range(N-1)):
# Compute the solution of the system
x[...,n,:] = chol_solve(V[...,n,:,:],
x[...,n,:] - mvdot(B[...,n,:,:],
x[...,n+1,:]))
# Compute the diagonal block of the inverse
V[...,n,:,:] = (chol_inv(V[...,n,:,:])
+ mmdot(C[...,n,:,:],
mmdot(V[...,n+1,:,:],
misc.T(C[...,n,:,:]))))
C[...,n,:,:] = - mmdot(C[...,n,:,:], V[...,n+1,:,:])
# Ensure symmetry by 0.5*(V+V.T)
V[...,n,:,:] = 0.5 * (V[...,n,:,:] + misc.T(V[...,n,:,:]))

return (V, C, x, ldet)
```