/usr/include/trilinos/Tsqr_Combine.hpp is in libtrilinos-tpetra-dev 12.12.1-5.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 | //@HEADER
// ************************************************************************
//
// Kokkos: Node API and Parallel Node Kernels
// Copyright (2008) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
//
// ************************************************************************
//@HEADER
/// \file Tsqr_Combine.hpp
/// \brief TSQR's six computational kernels.
///
#ifndef __TSQR_Combine_hpp
#define __TSQR_Combine_hpp
#include <Teuchos_ScalarTraits.hpp>
#include <Tsqr_ApplyType.hpp>
#include <Tsqr_CombineNative.hpp>
namespace TSQR {
/// \class Combine
/// \brief TSQR's six computational kernels
/// \author Mark Hoemmen
///
/// This class provides the six computational primitives required by
/// TSQR. The primitives are as follows, in which R, R_1, and R_2
/// each represent an n x n upper triangular matrix, A represents an
/// m x n cache block, and C_1 and C_2 represent cache blocks with
/// some number of columns p:
/// - Factor A (factor_first)
/// - Apply Q factor of A to C (apply_first)
/// - Factor [R; A] (factor_inner)
/// - Factor [R_1; R_2] (factor_pair)
/// - Apply Q factor of [R; A] to [C_1; C_2] (apply_inner)
/// - Apply Q factor of [R_1; R_2] to [C_1; C_2] (apply_pair)
///
/// \tparam Ordinal Type of indices into matrices.
/// \tparam Scalar Type of entries of matrices.
/// \tparam CombineImpl Type of a particular implementation of
/// Combine. Its public interface must contain this class'
/// interface.
///
/// All Combine methods are implemented using CombineImpl methods
/// with the same name. TSQR includes three implementations of the
/// CombineImpl interface:
/// - \c CombineDefault, which uses LAPACK and copies in and out of
/// scratch space that it owns,
/// - \c CombineNative, a C++ in-place (no scratch space) generic
/// implementation), and
/// - \c CombineFortran, a Fortran 9x in-place implementation for
/// LAPACK's four data types S, D, C, Z.
///
/// The default CombineImpl is \c CombineNative, since that should
/// work for any Ordinal and Scalar types for which LAPACK<Ordinal,
/// Scalar> and BLAS<Ordinal, Scalar> are implemented.
///
template< class Ordinal,
class Scalar,
class CombineImpl = CombineNative<Ordinal, Scalar, Teuchos::ScalarTraits<Scalar >::isComplex> >
class Combine {
public:
/// \typedef scalar_type
/// \brief Type of matrix entries.
typedef Scalar scalar_type;
/// \typedef ordinal_type
/// \brief Type of (intranode) matrix indices.
typedef Ordinal ordinal_type;
/// \typedef combine_impl_type
/// \brief Type of the implementation of Combine.
typedef CombineImpl combine_impl_type;
//! Constructor.
Combine () {}
/// Whether or not the QR factorizations computed by methods of
/// this class produce an R factor with all nonnegative diagonal
/// entries.
static bool QR_produces_R_factor_with_nonnegative_diagonal() {
return combine_impl_type::QR_produces_R_factor_with_nonnegative_diagonal();
}
/// \brief Factor the first cache block.
///
/// Compute the QR factorization of the nrows by ncols matrix A
/// (with leading dimension lda). Overwrite the upper triangle of
/// A with the resulting R factor, and the lower trapezoid of A
/// (along with the length ncols tau array) with the implicitly
/// stored Q factor.
///
/// \param nrows [in] Number of rows in A
/// \param ncols [in] Number of columns in A
/// \param A [in/out] On input: the nrows by ncols matrix (in
/// column-major order, with leading dimension lda) to factor.
/// On output: upper triangle contains the R factor, and lower
/// part contains the implicitly stored Q factor.
/// \param lda [in] Leading dimension of A
/// \param tau [out] Array of length ncols; on output, the
/// scaling factors for the Householder reflectors
/// \param work [out] Workspace array of length ncols
void
factor_first (const Ordinal nrows,
const Ordinal ncols,
Scalar A[],
const Ordinal lda,
Scalar tau[],
Scalar work[]) const
{
return impl_.factor_first (nrows, ncols, A, lda, tau, work);
}
/// \brief Apply the result of \c factor_first().
///
/// Apply the Q factor, as computed by factor_first() and stored
/// implicitly in A and tau, to the matrix C.
void
apply_first (const ApplyType& applyType,
const Ordinal nrows,
const Ordinal ncols_C,
const Ordinal ncols_A,
const Scalar A[],
const Ordinal lda,
const Scalar tau[],
Scalar C[],
const Ordinal ldc,
Scalar work[]) const
{
return impl_.apply_first (applyType, nrows, ncols_C, ncols_A,
A, lda, tau, C, ldc, work);
}
/// Apply the result of \c factor_inner().
///
/// Apply the Q factor stored in [R; A] to [C_top; C_bot]. The C
/// blocks are allowed, but not required, to have different leading
/// dimensions (ldc_top resp. ldc_bottom). R is upper triangular, so
/// we do not need it; the Householder reflectors representing the Q
/// factor are stored compactly in A (specifically, in all of A, not
/// just the lower triangle).
///
/// In the "sequential under parallel" version of TSQR, this function
/// belongs to the sequential part (i.e., operating on cache blocks on
/// a single processor).
///
/// \param apply_type [in] NoTranspose means apply Q, Transpose
/// means apply Q^T, and ConjugateTranspose means apply Q^H.
/// \param m [in] number of rows of A
/// \param ncols_C [in] number of columns of [C_top; C_bot]
/// \param ncols_Q [in] number of columns of [R; A]
/// \param A [in] m by ncols_Q matrix, in which the Householder
/// reflectors representing the Q factor are stored
/// \param lda [in] leading dimension of A
/// \param tau [in] array of length ncols_Q, storing the scaling
/// factors for the Householder reflectors representing Q
/// \param C_top [inout] ncols_Q by ncols_C matrix
/// \param ldc_top [in] leading dimension of C_top
/// \param C_bot [inout] m by ncols_C matrix
/// \param ldc_bot [in] leading dimension of C_bot
/// \param work [out] workspace array of length ncols_C
void
apply_inner (const ApplyType& apply_type,
const Ordinal m,
const Ordinal ncols_C,
const Ordinal ncols_Q,
const Scalar A[],
const Ordinal lda,
const Scalar tau[],
Scalar C_top[],
const Ordinal ldc_top,
Scalar C_bot[],
const Ordinal ldc_bot,
Scalar work[]) const
{
impl_.apply_inner (apply_type, m, ncols_C, ncols_Q,
A, lda, tau,
C_top, ldc_top, C_bot, ldc_bot, work);
}
/// \brief Factor [R; A] for square upper triangular R and cache block A.
///
/// Perform one "inner" QR factorization step of sequential / parallel
/// TSQR. (In either case, only one processor calls this routine.)
///
/// In the "sequential under parallel" version of TSQR, this function
/// belongs to the sequential part (i.e., operating on cache blocks on
/// a single processor). Only the first cache block $A_0$ is factored
/// as $Q_0 R_0 = A_0$ (see tsqr_factor_first); subsequent cache blocks
/// $A_k$ are factored using this routine, which combines them with
/// $R_{k-1}$.
///
/// Here is the matrix to factor:
/// \[
/// \begin{pmatrix}
/// R_{k-1} \\ % $A_{k-1}$ is $m_{k-1} \times n$ with $m_{k-1} \geq n$
/// A_k \\ % $m_k \times n$ with $m_k \geq n$
/// \end{pmatrix}
/// \]
///
/// Since $R_{k-1}$ is n by n upper triangular, we can store the
/// Householder reflectors (representing the Q factor of $[R_{k-1};
/// A_k]$) entirely in $A_k$ (specifically, in all of $A_k$, not just
/// below the diagonal).
///
/// \param m [in] Number of rows in the "bottom" block to factor.
/// The number of rows in the top block doesn't matter, given the
/// assumptions above, as long as $m_{k-1} \geq n$.
/// \param n [in] Number of columns (same in both blocks)
/// \param R [inout] "Top" upper triangular n by n block $R_{k-1}$.
/// Overwritten with the new R factor $R_k$ of $[R_{k-1}; A_k]$.
/// \param ldr [in] Leading dimension of R
/// \param A [inout] "Bottom" dense m by n block $A_k$. Overwritten
/// with the Householder reflectors representing the Q factor of
/// $[R_{k-1}; A_k]$.
/// \param tau [out] Scaling factors of the Householder reflectors.
/// Corresponds to the TAU output of LAPACK's _GEQRF.
/// \param work [out] Workspace (length >= n; don't need lwork or
/// workspace query)
void
factor_inner (const Ordinal m,
const Ordinal n,
Scalar R[],
const Ordinal ldr,
Scalar A[],
const Ordinal lda,
Scalar tau[],
Scalar work[]) const
{
impl_.factor_inner (m, n, R, ldr, A, lda, tau, work);
}
/// \brief Factor the pair of square upper triangular matrices [R_top; R_bot].
///
/// Store the resulting R factor in R_top, and the resulting
/// Householder reflectors implicitly in R_bot and tau.
///
/// \param n [in] Number of rows and columns of each of R_top and R_bot
/// \param R_top [inout] n by n upper triangular matrix
/// \param ldr_top [in] Leading dimension of R_top
/// \param R_bot [inout] n by n upper triangular matrix
/// \param ldr_bot [in] Leading dimension of R_bot
/// \param tau [out] Scaling factors for Householder reflectors
/// \param work [out] Workspace array (of length >= n)
///
void
factor_pair (const Ordinal n,
Scalar R_top[],
const Ordinal ldr_top,
Scalar R_bot[],
const Ordinal ldr_bot,
Scalar tau[],
Scalar work[]) const
{
impl_.factor_pair (n, R_top, ldr_top, R_bot, ldr_bot, tau, work);
}
/// \brief Apply the result of \c factor_pair().
///
/// Apply Q factor (or Q^T or Q^H) of the 2*ncols_Q by ncols_Q
/// matrix [R_top; R_bot] (stored in R_bot and tau) to the
/// 2*ncols_Q by ncols_C matrix [C_top; C_bot]. The two blocks
/// C_top and C_bot may have different leading dimensions (ldc_top
/// resp. ldc_bot).
///
/// \param apply_type [in] NoTranspose means apply Q, Transpose
/// means apply Q^T, and ConjugateTranspose means apply Q^H.
void
apply_pair (const ApplyType& apply_type,
const Ordinal ncols_C,
const Ordinal ncols_Q,
const Scalar R_bot[],
const Ordinal ldr_bot,
const Scalar tau[],
Scalar C_top[],
const Ordinal ldc_top,
Scalar C_bot[],
const Ordinal ldc_bot,
Scalar work[]) const
{
impl_.apply_pair (apply_type, ncols_C, ncols_Q,
R_bot, ldr_bot, tau,
C_top, ldc_top, C_bot, ldc_bot, work);
}
private:
//! The implementation of Combine.
combine_impl_type impl_;
};
} // namespace TSQR
#endif // __TSQR_Combine_hpp
|