diff --git a/.github/workflows/dbg_smoke.yml b/.github/workflows/dbg_smoke.yml
index 8fb2795ce..1f956c072 100644
--- a/.github/workflows/dbg_smoke.yml
+++ b/.github/workflows/dbg_smoke.yml
@@ -17,7 +17,7 @@ jobs:
run: sudo apt-get install -y libnuma-dev
- name: Configure
- run: mkdir build && cd build && ../bootstrap.sh --prefix=../install --debug-build
+ run: mkdir build && cd build && ../bootstrap.sh --prefix=../install --debug-build --no-dense
- name: Build
working-directory: ${{github.workspace}}/build
diff --git a/.gitignore b/.gitignore
index bbb0d673e..a0eca11b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,5 @@ paths.mk
[Bb]uild*/
[Oo]bj*/
[Ii]nstall*/
-cmake-build-*/
\ No newline at end of file
+cmake-build-*/
+.vscode/
\ No newline at end of file
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 368bc4c26..d1c8c08e7 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -115,7 +115,7 @@ workflow:
# before_script:
# - yum -y update && yum -y groupinstall "Development Tools" && yum -y install make autoconf cmake numactl-devel
# script:
-# - rm -rf build install && mkdir -p install build && cd ./build && ../bootstrap.sh --prefix=../install && make -j$(nproc) && make -j$(nproc) build_tests_all
+# - rm -rf build install && mkdir -p install build && cd ./build && ../bootstrap.sh --prefix=../install --no-dense && make -j$(nproc) && make -j$(nproc) build_tests_all
# - *strip_symbols
# artifacts:
# paths:
@@ -131,7 +131,7 @@ workflow:
# before_script:
# - yum -y update && yum -y groupinstall "Development Tools" && yum -y install make autoconf cmake numactl-devel
# script:
-# - mkdir -p install build && cd ./build && ../bootstrap.sh --prefix=../install --debug-build && make -j$(nproc) && make -j$(nproc) build_tests_all
+# - mkdir -p install build && cd ./build && ../bootstrap.sh --prefix=../install --debug-build --no-dense && make -j$(nproc) && make -j$(nproc) build_tests_all
# rules:
# - if: $EXTRA_TESTS_ENABLED == "yes"
@@ -187,7 +187,7 @@ workflow:
build_test:
script:
- - mkdir -p install build && cd ./build && ../bootstrap.sh --prefix=../install --with-datasets=${ALP_DATASETS}
+ - mkdir -p install build && cd ./build && ../bootstrap.sh --prefix=../install --with-datasets=${ALP_DATASETS} --no-dense
&& make -j$(nproc) build_tests_all
- *strip_symbols
artifacts:
@@ -238,7 +238,7 @@ test_installation:
build_test_buildtype_debug:
script:
- - mkdir -p install build && cd ./build && ../bootstrap.sh --prefix=../install --with-datasets=${ALP_DATASETS}
+ - mkdir -p install build && cd ./build && ../bootstrap.sh --prefix=../install --with-datasets=${ALP_DATASETS} --no-dense
--debug-build && make -j$(nproc) && make -j$(nproc) build_tests_all
- *strip_symbols
artifacts:
@@ -272,7 +272,7 @@ gitleaks:
# factored out command to download the datasets, cmake, and build in non-debug mode
.setup_and_build_ndebug_slurm: &setup_and_build_ndebug_slurm
- mkdir -p install build && cd ./build
- - ../bootstrap.sh --prefix=../install --with-datasets=${SLURM_DATASETS_DIR_PATH} --no-hyperdags
+ - ../bootstrap.sh --prefix=../install --with-datasets=${SLURM_DATASETS_DIR_PATH} --no-hyperdags --no-dense
- make -j$(nproc)
tests_performance_slurm:
@@ -318,7 +318,7 @@ build_test_lpf:
- if: $LPF_TESTS_ENABLED == "yes"
script:
# build only LPF-related tests
- - mkdir -p install build && cd ./build && ../bootstrap.sh --with-lpf=${LPF_PATH} --no-nonblocking --no-reference
+ - mkdir -p install build && cd ./build && ../bootstrap.sh --with-lpf=${LPF_PATH} --no-nonblocking --no-reference --no-dense
--no-hyperdags --prefix=../install --with-datasets=${ALP_DATASETS} && make -j$(nproc) build_tests_all
- *strip_symbols
artifacts:
@@ -389,7 +389,7 @@ build_test_gcc_versions:
# VER: [11,12,13,14]
script:
- mkdir -p install build && cd ./build &&
- CXX=${CXX_COMPILER}-${VER} CC=${CC_COMPILER}-${VER} ../bootstrap.sh
+ CXX=${CXX_COMPILER}-${VER} CC=${CC_COMPILER}-${VER} ../bootstrap.sh --no-dense
--prefix=../install --with-datasets=${ALP_DATASETS}
--with-lpf=${LPF_BASE_PATH}/build_mpich_${CC_COMPILER}_${VER}/install &&
make -j$(nproc) build_tests_all
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 02c49eb37..c667372e2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -37,7 +37,7 @@ project( GraphBLAS
DESCRIPTION "The ultimate engine for sparse computation"
LANGUAGES CXX C
)
-set( CMAKE_CXX_STANDARD 11 )
+set( CMAKE_CXX_STANDARD 14 )
set( CMAKE_CXX_STANDARD_REQUIRED ON )
# install within the build directory by default (NOT to /usr/local or the likes)
@@ -50,6 +50,8 @@ endif()
### CONFIGURATION OPTIONS
# to choose backends and dependencies
option( WITH_REFERENCE_BACKEND "With Reference backend" ON )
+option( WITH_ALP_REFERENCE_BACKEND "With Reference Dense backend" ON )
+option( WITH_ALP_DISPATCH_BACKEND "With Dispatch Dense backend" OFF )
option( WITH_OMP_BACKEND "With OMP backend" ON )
option( WITH_HYPERDAGS_BACKEND "With Hyperdags backend" ON )
if( WITH_HYPERDAGS_BACKEND )
@@ -58,6 +60,7 @@ if( WITH_HYPERDAGS_BACKEND )
endif()
endif()
option( WITH_NONBLOCKING_BACKEND "With Nonblocking backend" ON )
+option( WITH_ALP_OMP_BACKEND "With OMP Dense backend" OFF )
option( WITH_NUMA "With NUMA support" ON )
option( LPF_INSTALL_PATH "Path to the LPF tools for the BSP1D and Hybrid backends" OFF )
# the following options depend on LPF_INSTALL_PATH being set
@@ -132,7 +135,11 @@ if( NOT WITH_REFERENCE_BACKEND AND
NOT WITH_NONBLOCKING_BACKEND AND
NOT WITH_BSP1D_BACKEND AND
NOT WITH_HYBRID_BACKEND AND
- NOT WITH_HYPERDAGS_BACKEND )
+ NOT WITH_HYPERDAGS_BACKEND AND
+ NOT WITH_ALP_REFERENCE_BACKEND AND
+ NOT WITH_ALP_DISPATCH_BACKEND AND
+ NOT WITH_ALP_OMP_BACKEND )
+ # at least one backend should be enabled
message( FATAL_ERROR "At least one backend should be enabled")
endif()
@@ -156,6 +163,15 @@ endif()
if( WITH_HYBRID_BACKEND )
list( APPEND AVAILABLE_TEST_BACKENDS "hybrid" )
endif()
+if( WITH_ALP_REFERENCE_BACKEND )
+ list( APPEND AVAILABLE_TEST_BACKENDS "alp_reference" )
+endif()
+if( WITH_ALP_DISPATCH_BACKEND )
+ list( APPEND AVAILABLE_TEST_BACKENDS "alp_dispatch" )
+endif()
+if( WITH_ALP_OMP_BACKEND )
+ list( APPEND AVAILABLE_TEST_BACKENDS "alp_omp" )
+endif()
# Enable backends based on features
if( ENABLE_SOLVER_LIB )
@@ -267,7 +283,6 @@ if( WITH_BSP1D_BACKEND OR WITH_HYBRID_BACKEND )
find_package( LPF REQUIRED )
endif( )
-
### SETTINGS FOR COMPILATION
set( TEST_CATEGORIES "unit" "smoke" "performance" )
@@ -280,6 +295,10 @@ set( TEST_CATEGORIES "unit" "smoke" "performance" )
# scope and propagate down to the other files
include( AddGRBVars )
+if( WITH_ALP_DISPATCH_BACKEND )
+ include( Blas )
+endif( )
+
# here, add information for wrappers generated during installation
include( AddGRBInstall )
@@ -303,8 +322,11 @@ include( Transition )
# by default no headers are built
set( WITH_REFERENCE_BACKEND_HEADERS OFF )
+set( WITH_ALP_REFERENCE_BACKEND_HEADERS OFF )
+set( WITH_ALP_DISPATCH_BACKEND_HEADERS OFF )
set( WITH_OMP_BACKEND_HEADERS OFF )
set( WITH_HYPERDAGS_BACKEND_HEADERS OFF )
+set( WITH_ALP_OMP_BACKEND_HEADERS OFF )
# activate headers based on requested backends
if( WITH_REFERENCE_BACKEND OR WITH_BSP1D_BACKEND OR WITH_NONBLOCKING_BACKEND )
@@ -321,6 +343,18 @@ if( WITH_OMP_BACKEND OR WITH_HYBRID_BACKEND )
set( WITH_OMP_BACKEND_HEADERS ON )
endif()
+if( WITH_ALP_REFERENCE_BACKEND )
+ set( WITH_ALP_REFERENCE_BACKEND_HEADERS ON )
+endif()
+
+if( WITH_ALP_DISPATCH_BACKEND )
+ set( WITH_ALP_DISPATCH_BACKEND_HEADERS ON )
+endif()
+
+if( WITH_ALP_OMP_BACKEND )
+ set( WITH_ALP_OMP_BACKEND_HEADERS ON )
+endif()
+
add_subdirectory( include )
### BACKEND IMPLEMENTATIONS
diff --git a/README.md b/README.md
index 54c891dd4..b39d14b7c 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,9 @@ See the License for the specific language governing permissions and
limitations under the License.
+# ALP/Dense Testing Guide
+
+Please visit [The ALP/Dense Tests](alpdense.md) for the lastest information about testing the prototype ALP/Dense interface and backends.
This distribution contains the C++ Algebraic Programming (ALP) framework, and
provides the ALP/GraphBLAS, ALP/Pregel, and Sparse BLAS programming interfaces.
diff --git a/alpdense.md b/alpdense.md
new file mode 100644
index 000000000..7f109436c
--- /dev/null
+++ b/alpdense.md
@@ -0,0 +1,314 @@
+# Introduction
+This file is intended to provide instructions for:
+- Running smoke, unoptimized performance tests for the ALP/Dense sequential reference backend (aka alp_reference);
+- Running optimized performance tests of the ALP/Dense sequential reference backend with dispatch to BLAS (aka alp_dispatch);
+- Running optimized performance tests of the ALP/Dense shared memory backend with dispatch to BLAS (aka alp_omp).
+
+# Performance Tests
+
+This tests have been executed:
+- On a Kunpeng 920 node using 1 core for the sequential reference and alp_dispatch tests and 64 cores for the alp_omp tests;
+- Compiling with gcc 9.4.0;
+- Linking against KunpengBLAS from the Kunpeng BoostKit 22.0.RC1 and the netlib LAPACK linking to the same BLAS library.
+- All tests report runtime in milliseconds after the _time (ms, ...)_ text lines printed on screen.
+
+In our evaluation we extracted the _Kunpeng BoostKit 22.0.RC1_ in a `BLAS_ROOT` folder (the `usr/local/kml` directory extracted from the `boostkit-kml-1.6.0-1.aarch64.rpm` package). `BLAS_ROOT` should contain the `include/kblas.h` header file and the `lib/kblas/{locking, nolocking, omp, pthread}/libkblas.so` library.
+
+If no system LAPACK library can be found by the compiler, `LAPACK_LIB` (containing the `liblapack.{a,so}` library) and `LAPACK_INCLUDE` (containing the `lapacke.h` header file) have to be appropriately set and provided to cmake, for example exporting them as follows:
+
+```
+# The root folder where this branch is cloned.
+export ALP_SOURCE="$(realpath ../)"
+# The build folder from which running these steps.
+export ALP_BUILD="$(pwd)"
+# The KML installation folder.
+# For example, the "usr/local/kml" directory extracted from the "boostkit-kml-1.6.0-1.aarch64.rpm"
+#export BLAS_ROOT="/path/to/kunpengblas/boostkit-kml-1.6.0.aarch64/usr/local/kml"
+# The lib folder of the LAPACK library.
+#export LAPACK_LIB="/path/to/lapack/netlib/build/lib"
+# The include folder of the LAPACK library.
+# Must include the C/C++ LAPACKE interface.
+#export LAPACK_INCLUDE="/path/to/lapack/netlib/lapack-3.9.1/LAPACKE/include/"
+
+if [ -z ${BLAS_ROOT+x} ] || [ -z ${LAPACK_LIB+x} ] || [ -z ${LAPACK_INCLUDE+x} ]; then
+ echo "Please define BLAS_ROOT, LAPACK_LIB, and LAPACK_INCLUDE variables."
+fi
+```
+
+In particular, we assume the availability of the C/C++ LAPACKE interface and, for all tests below, we assume no system libraries are available.
+
+***Assuming this branch is cloned in the `ALP_SOURCE` folder, all instructions provided below should be run from a `$ALP_SOURCE/build` folder.***
+
+An analogous [script-like](alpdense.sh) version of this page is available in the ALP root directory of this branch. You may decide to run it directly (**note:** always making sure to customize the export commands above to your environment first) as follows:
+
+```
+bash ../alpdense.sh
+```
+
+The scripts also logs the output of each test group below into a separate file in `$ALP_BUILD/logs`, i.e.,
+- Smoke tests:
+ - `alp_smoketests.log` (ALP smoketests - reference backend, unoptimized)
+ - `lapack_smoketests.log` (LAPACK smoketests - sequential KBLAS)
+- Performance tests:
+ - `lapack_doptrf_seq.log` (LAPACK `dpotrf` - sequential KBLAS)
+ - `alp_dpotrf_seq.log` (ALP `dpotrf` - dispatch backend, sequential KBLAS)
+ - `lapack_dpotrf_omp.log` (LAPACK `dpotrf` - shared-memory KBLAS)
+ - `alp_dpotrf_omp.log` (ALP `dpotrf` - dispatch backend, shared-memory KBLAS)
+ - `kblas_mxm_omp.log` (KunpengBLAS `mxm` - shared memory)
+ - `alp_smoketests.log` (ALP `mxm` - omp+dispatch backends, shared-memory KBLAS)
+
+The rest of this page describes each step of the script above.
+
+# Source Code Location
+
+Assuming this branch is cloned in the `ALP_SOURCE` folder, all ALP/Dense include files are located in the `$ALP_SOURCE/include/alp` folder:
+- In particular, all the pre-implemented algorithms are located in `$ALP_SOURCE/include/alp/algorithms`
+- The reference, dispatch, and omp backends are located in `$ALP_SOURCE/include/alp/reference`, `$ALP_SOURCE/include/$ALP_SOURCE/alp/dispatch`, and `$ALP_SOURCE/include/alp/omp`, respectively.
+
+All tests discussed below are collected in the `$ALP_SOURCE/tests/smoke` and `$ALP_SOURCE/tests/performance` folders. The folder `$ALP_SOURCE/tests/unit` contains additional unit tests not discuss in this page.
+
+# Dependencies
+
+For all tests below, the standard ALP dependencies are required:
+- gfortran: -lgfortran
+- LibNUMA: -lnuma
+- Standard math library: -lm
+- POSIX threads: -lpthread
+- OpenMP: -fopenmp in the case of GCC
+
+# Sequential Smoke Tests (Functional, Unoptimized)
+
+We collect the following smoke tests associated with the ALP/Dense reference backend:
+- Basic targets:
+ - General matrix-matrix multiplication ([source](tests/smoke/alp_gemm.cpp))
+ - Householder tridiagonalization of a real symmetric/complex Hermitian matrix ([source](tests/smoke/alp_zhetrd.cpp))
+ - Divide and conquer tridiagonal eigensolver for tridiagonal, real symmetric matrices ([source](tests/smoke/alp_stedc.cpp))
+ - Eigensolver for real symmetric matrices ([source](tests/smoke/alp_zheevd.cpp))
+ - Householder QR decomposition of a real/complex general matrix ([source](tests/smoke/alp_zgeqrf.cpp))
+- Challenge targets:
+ - Triangular linear system solve using backsubstitution of upper tridiagonal, real/complex matrix ([source](tests/smoke/alp_backsubstitution.cpp))
+ - Triangular linear system solve using forwardsubstitution of lower tridiagonal, real/complex matrix ([source](tests/smoke/alp_forwardsubstitution.cpp))
+ - Cholesky decomposition of a symmetric/Hermitian positive definite matrix ([source](tests/smoke/alp_cholesky.cpp))
+ - Householder LU decomposition of a real/complex general matrices ([source](tests/smoke/alp_zgetrf.cpp))
+ - Inverse of a symmetric/Hermitian positive definite matrix ([source](tests/smoke/alp_potri.cpp))
+ - Singular value decomposition of a real/complex general matrix ([source](tests/smoke/alp_zgesvd.cpp))
+
+This tests are collected and run as ALP smoketests.
+From `$ALP_SOURCE/build` run:
+
+```
+cmake -DWITH_ALP_REFERENCE_BACKEND=ON -DCMAKE_INSTALL_PREFIX=./install $ALP_SOURCE || ( echo "test failed" && exit 1 )
+make install -j$(nproc) || ( echo "test failed" && exit 1 )
+SMOKE_PRINT_TIME=ON make smoketests_alp -j$(nproc)
+```
+
+**Note:** The variable `SMOKE_PRINT_TIME=ON` is used to print timing information of each test to screen. Set it to `OFF` or remove it from the command if this action is not desired.
+
+If the tests run correctly, for each of them you should see an output similar to the following:
+
+```
+****************************************************************************************
+ FUNCTIONAL PERFORMANCE DESCRIPTION
+----------------------------------------------------------------------------------------
+
+>>> [x] [ ] Tests Cholesky decomposition for a random
+ symmetric positive definite matrix (100x100).
+Timing of blocked inplace version with bs = 64.
+ time (ms, total) = 72.1747
+ time (ms, per repeat) = 3.60873
+Test OK
+
+```
+
+To compare with LAPACK+KunpengBLAS (not ALP code) you may run the following:
+
+```
+KBLAS_LIB=$BLAS_ROOT/lib/kblas/locking
+USECASES=("dstedc" "dsyevd" "dsytrd" "zhetrd" "dgeqrf" "dgesvd" "dgetrf" "dpotri")
+
+for USECASE in "${USECASES[@]}"
+do
+ install/bin/grbcxx -o ${USECASE}_lapack_reference.exe $ALP_SOURCE/tests/performance/lapack_${USECASE}.cpp $LAPACK_LIB/liblapack.a $KBLAS_LIB/libkblas.so -Wl,-rpath $KBLAS_LIB -I$LAPACK_INCLUDE -lgfortran || ( echo "Compiling ${USECASE} failed" && exit 1 )
+done
+
+for USECASE in "${USECASES[@]}"
+do
+ ./${USECASE}_lapack_reference.exe -n 100 -repeat 20 || ( echo "test ${USECASE} failed" && exit 1 )
+done
+```
+
+# Sequential Cholesky Decomposition Tests (Dispatch, Optimized)
+
+Here we compare our ALP Cholesky implementation, based on the alp_dispatch backend, against the `potrf` LAPACK functionality.
+
+From the `$ALP_SOURCE/build` folder run the following commands:
+
+```
+cmake -DKBLAS_ROOT="$BLAS_ROOT" -DWITH_ALP_DISPATCH_BACKEND=ON -DCMAKE_INSTALL_PREFIX=./install $ALP_SOURCE || ( echo "test failed" && exit 1 )
+make install -j$(nproc) || ( echo "test failed" && exit 1 )
+```
+
+## LAPACK-Based Test (Sequential BLAS)
+
+To compile and run the LAPACK-based Cholesky test (not ALP code) run the following commands:
+```
+install/bin/grbcxx -b alp_dispatch -o cholesky_lapack_reference.exe $ALP_SOURCE/tests/performance/lapack_cholesky.cpp $LAPACK_LIB/liblapack.a -I$LAPACK_INCLUDE -lgfortran || ( echo "test failed" && exit 1 )
+for MSIZE in {400..4000..100}
+do
+ ./cholesky_lapack_reference.exe -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 )
+done
+```
+
+If the commands run correctly the output on screen should look like the following:
+
+```
+Testing dpotrf_ for U^T * U = S, with S SPD of size ( 1024 x 1024 )
+Test repeated 10 times.
+ time (ms, total) = 433.652
+ time (ms, per repeat) = 43.3652
+Tests OK
+```
+
+In our tests, we executed `./cholesky_lapack_reference.exe` with matrix sizes (`-n` flag) in the range [400, 4000] in steps of 100.
+
+## ALP-Based Test (Dispatch Sequential Building Blocks to Optimized BLAS)
+
+Some facts about this test:
+- The algorithm is a blocked variant of Cholesky with block size BS = 64 (as done in LAPACK).
+- It recursively requires an unblocked version of the same algorithm (of size BSxBS) which does not dispatch to LAPACK.
+- All BLAS functions needed by the algorithm are dispatched to the external BLAS library. In particular, as POC of what ALP could offer in terms of performance if its primitives could be efficiently generated/optimized (e.g., via our envisioned MLIR-based backend for delayed compilation), it dispatches the triangular solve and the fused `foldl`+`mxm` operations.
+
+```
+make test_alp_cholesky_perf_alp_dispatch -j$(nproc) || ( echo "test failed" && exit 1 )
+for MSIZE in {400..4000..100}
+do
+ tests/performance/alp_cholesky_perf_alp_dispatch -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 )
+done
+```
+
+If the commands run correctly the output on screen should look like the following:
+
+```
+Testing Cholesky decomposition U^T * U = S, with S SPD of size ( 1024 x 1024 )
+Test repeated 10 times.
+ time (ms, total) = 463.652
+ time (ms, per repeat) = 46.3652
+Tests OK
+```
+
+As for the LAPACK-based test, we executed `tests/performance/alp_cholesky_perf_alp_dispatch` with matrix sizes (`-n` flag) in the range [400, 4000] in steps of 100.
+
+**Note:** A consistent test should use the same BLAS in LAPACK-based as well as in the ALP-based tests.
+
+## Cholesky Decomposition with Shmem BLAS
+
+An analogous experiment can be conducted using the shared-memory BLAS library in place of the sequential one as follows (the following block runs both the LAPACK and the ALP tests):
+
+```
+subbuild="build_potrf_with_omp_blas"
+rm -rf $subbuild && mkdir $subbuild && cd $subbuild
+cmake -DKBLAS_ROOT="$BLAS_ROOT" -DKBLAS_IMPL=omp -DWITH_ALP_OMP_BACKEND=ON -DWITH_ALP_DISPATCH_BACKEND=ON -DCMAKE_INSTALL_PREFIX=./install $ALP_SOURCE || ( echo "test failed" && exit 1 )
+make install -j$(nproc) || ( echo "test failed" && exit 1 )
+
+install/bin/grbcxx -b alp_dispatch -o cholesky_lapack_omp.exe $ALP_SOURCE/tests/performance/lapack_cholesky.cpp $LAPACK_LIB/liblapack.a -I$LAPACK_INCLUDE -lgfortran || ( echo "test failed" && exit 1 )
+for NT in 1 64 96
+do
+ echo "#####################################################################"
+ echo " Testing potrf: LAPACK + KunpengBLAS (omp) with OMP_NUM_THREADS=${NT}"
+ echo "#####################################################################"
+ for MSIZE in {400..4000..100}
+ do
+ OMP_NUM_THREADS=${NT} ./cholesky_lapack_omp.exe -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 )
+ done
+ echo " Tests completed."
+ echo "#####################################################################"
+done
+
+make test_alp_cholesky_perf_alp_dispatch -j$(nproc) || ( echo "test failed" && exit 1 )
+for NT in 1 64 96
+do
+ echo "##########################################################################"
+ echo "Testing potrf: Testing ALP + KunpengBLAS (omp) with OMP_NUM_THREADS=${NT}"
+ echo "##########################################################################"
+ for MSIZE in {400..4000..100}
+ do
+ OMP_NUM_THREADS=${NT} tests/performance/alp_cholesky_perf_alp_dispatch -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 )
+ done
+ echo " Tests completed."
+ echo "##########################################################################"
+done
+cd $ALP_BUILD
+
+```
+
+# Shared-Memory Parallel `mxm` Tests (Optimized)
+
+Here we compare our ALP shared memory backend (alp_omp) `mxm` implementation against the BLAS's `gemm` functionality.
+`mxm` is an inplace, ALP primitive that computes C = C + A*B, with matrices of conforming sizes.
+
+Our current shared memory backend implementation is currently only supporting square thread grids (although the methodology is not limited to that in general). For this reason, in the tests below we run both LAPACK and ALP using 64 threads. To ensure a fair comparison, we link with the `omp` version of KunpengBLAS.
+
+You can compile with the `omp` version of KunpengBLAS by additionally providing the `-DKBLAS_IMPL=omp` flag when calling cmake. However, this should be compiled in a different directory from the other BLAS-based builds, as follows:
+```
+subbuild="build_mxm_with_omp_blas"
+rm -rf $subbuild && mkdir $subbuild && cd $subbuild
+cmake -DKBLAS_ROOT="$BLAS_ROOT" -DKBLAS_IMPL=omp -DWITH_ALP_OMP_BACKEND=ON -DWITH_ALP_DISPATCH_BACKEND=ON -DCMAKE_INSTALL_PREFIX=./install $ALP_SOURCE || ( echo "test failed" && exit 1 )
+make install -j$(nproc) || ( echo "test failed" && exit 1 )
+```
+
+## `gemm`-Based BLAS Test.
+
+from `$subbuild` run:
+```
+install/bin/grbcxx -b alp_dispatch -o blas_mxm.exe $ALP_SOURCE/tests/performance/blas_mxm.cpp -lgfortran || ( echo "test failed" && exit 1 )
+for MSIZE in {1024..10240..1024}
+do
+ OMP_NUM_THREADS=64 ./blas_mxm.exe -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 )
+done
+cd $ALP_BUILD
+```
+
+If the commands run correctly the output on screen should look like the following:
+
+```
+Testing cblas_dgemm for C(1024 x 1024) += A(1024 x 1024) x B(1024 x 1024) 10 times.
+ time (ms, total) = 116.494
+ time (ms, per repeat) = 11.6494
+Tests OK
+```
+
+In our tests, we executed `./blas_mxm.exe` with matrix sizes (`-n` flag) in the range [1024:1024:10240].
+
+## ALP-Based Test (Dispatch Sequential Building Blocks to Optimized BLAS).
+
+Some facts about this test:
+- The ALP `mxm` shared memory implementation is based on a [2.5D matrix multiplication algorithm](https://netlib.org/lapack/lawnspdf/lawn248.pdf);
+- In this test we execute with a 3D thread grid of size 4x4x4;
+- We set `OMP_NUM_THREADS=64` threads and fix `GOMP_CPU_AFFINITY="0-15 24-39 48-63 72-87"` to reflect the cores and NUMA topology of the node;
+- The algorithm is allocating memory using a 2D block-cyclic layout with blocks of size 128x128.
+- Each sequential block-level `mxm` (128x128x128) is dispatched to the selected BLAS library.
+
+From `$ALP_SOURCE/build` run:
+
+```
+subbuild="build_mxm_with_alp_omp"
+rm -rf $subbuild && mkdir $subbuild && cd $subbuild
+cmake -DKBLAS_ROOT="$BLAS_ROOT" -DWITH_ALP_DISPATCH_BACKEND=ON -DWITH_ALP_OMP_BACKEND=ON -DCMAKE_INSTALL_PREFIX=./install $ALP_SOURCE || ( echo "test failed" && exit 1 )
+make test_alp_mxm_perf_alp_omp -j$(nproc) || ( echo "test failed" && exit 1 )
+for MSIZE in {1024..10240..1024}
+do
+ GOMP_CPU_AFFINITY="0-15 24-39 48-63 72-87" OMP_NUM_THREADS=64 tests/performance/alp_mxm_perf_alp_omp -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 )
+done
+cd $ALP_BUILD
+```
+
+If the commands run correctly the output on screen should look like the following:
+
+```
+Testing C(1024 x 1024) += A(1024 x 1024) x B(1024 x 1024) 10 times.
+ time (ms, total) = 69.7239
+ time (ms, per repeat) = 6.97239
+Tests OK
+```
+
+As for the gemm-based test, we executed `tests/performance/alp_mxm_perf_alp_omp` with matrix sizes (`-n` flag) in the range [1024:1024:10240].
diff --git a/alpdense.sh b/alpdense.sh
new file mode 100644
index 000000000..b817853d9
--- /dev/null
+++ b/alpdense.sh
@@ -0,0 +1,258 @@
+# This file is intended to provide instructions for:
+# Running smoke tests for the ALP/Dense reference backend (aka alp_reference);
+# Running performance tests of the ALP/Dense reference backend with dispatch to BLAS (aka alp_dispatch);
+# Running performance tests of the ALP/Dense shared memory backend with dispatch to BLAS (aka alp_omp).
+
+# For all tests below standard ALP dependencies are required:
+# gfortran: -lgfortran
+# LibNUMA: -lnuma
+# Standard math library: -lm
+# POSIX threads: -lpthread
+# OpenMP: -fopenmp in the case of GCC
+
+# Before running please export:
+
+# The root folder where this branch is cloned.
+export ALP_SOURCE="$(realpath ../)"
+
+# The build folder from which running these steps.
+export ALP_BUILD="$(pwd)"
+
+# The KML installation folder.
+# For example, the "usr/local/kml" directory extracted from the "boostkit-kml-1.6.0-1.aarch64.rpm"
+#export BLAS_ROOT="/path/to/kunpengblas/boostkit-kml-1.6.0.aarch64/usr/local/kml"
+
+# The lib folder of the LAPACK library.
+#export LAPACK_LIB="/path/to/lapack/netlib/build/lib"
+
+# The include folder of the LAPACK library.
+# Must include the C/C++ LAPACKE interface.
+#export LAPACK_INCLUDE="/path/to/lapack/netlib/lapack-3.9.1/LAPACKE/include/"
+
+if [ -z ${BLAS_ROOT+x} ] || [ -z ${LAPACK_LIB+x} ] || [ -z ${LAPACK_INCLUDE+x} ]; then
+ echo "Please define BLAS_ROOT, LAPACK_LIB, and LAPACK_INCLUDE variables."
+ exit 1
+fi
+
+####################
+####################
+# Smoke tests
+####################
+####################
+
+# We collect the following smoke tests associated with the ALP/Dense reference backend:
+# (Basic targets)
+# General matrix-matrix multiplication (source: tests/smoke/alp_gemm.cpp)
+# Householder tridiagonalization of a real symmetric/complex Hermitian matrix (source: tests/smoke/alp_zhetrd.cpp)
+# Divide and conquer tridiagonal eigensolver for tridiagonal, real symmetric matrices (source: tests/smoke/alp_dstedc.cpp)
+# Eigensolver for real symmetric matrices (source: tests/smoke/alp_syevd.cpp)
+# Householder QR decomposition of a real/complex general matrix (source: tests/smoke/alp_zgeqrf.cpp)
+
+# (Challenge targets)
+# Triangular linear system solve using backsubstitution of upper tridiagonal, real/complex matrix (source: tests/smoke/alp_backsubstitution.cpp)
+# Triangular linear system solve using forwardsubstitution of lower tridiagonal, real/complex matrix (source: tests/smoke/alp_forwardsubstitution.cpp)
+# Cholesky decomposition of a symmetric/Hermitian positive definite matrix (source: tests/smoke/alp_cholesky.cpp)
+# Householder LU decomposition of a real/complex general matrices (source: tests/smoke/alp_zgetrf.cpp)
+# Inverse of a symmetric/Hermitian positive definite matrix (source code: tests/smoke/alp_potri.cpp)
+# Singular value decomposition of a real/complex general matrix (source code: tests/smoke/alp_zgesvd.cpp)
+
+# This tests are collected and run as ALP smoketests as follows:
+
+LOGDIR=$ALP_BUILD/logs
+mkdir -p $LOGDIR
+
+cmake -DWITH_ALP_REFERENCE_BACKEND=ON -DCMAKE_INSTALL_PREFIX=./install $ALP_SOURCE || ( echo "test failed" && exit 1 )
+make install -j$(nproc) || ( echo "test failed" && exit 1 )
+SMOKE_PRINT_TIME=ON make smoketests_alp -j$(nproc) | tee $LOGDIR/alp_smoketests.log
+
+# To compile and run the LAPACK-based tests (not ALP code).
+# Here you can use gcc flags, i.e. "-L/path/tolapack/ -llapack" (or simply " -llapack" to use system installed lapack library).
+KBLAS_LIB=$BLAS_ROOT/lib/kblas/locking
+USECASES=("dstedc" "dsyevd" "dsytrd" "zhetrd" "dgeqrf" "dgesvd" "dgetrf" "dpotri")
+
+for USECASE in "${USECASES[@]}"
+do
+ install/bin/grbcxx -o ${USECASE}_lapack_reference.exe $ALP_SOURCE/tests/performance/lapack_${USECASE}.cpp $LAPACK_LIB/liblapack.a $KBLAS_LIB/libkblas.so -Wl,-rpath $KBLAS_LIB -I$LAPACK_INCLUDE -lgfortran || ( echo "Compiling ${USECASE} failed" && exit 1 )
+done
+
+LOGFILE=$LOGDIR/lapack_smoketests.log
+echo "#####################################################################"
+echo " LAPACK smoketests (seq)" | tee -a $LOGFILE
+echo "#####################################################################"
+for USECASE in "${USECASES[@]}"
+do
+ ( ./${USECASE}_lapack_reference.exe -n 100 -repeat 20 || ( echo "test ${USECASE} failed" && exit 1 ) )
+done | tee -a $LOGFILE
+
+####################
+####################
+# Performance tests
+####################
+####################
+
+# This tests have been executed:
+# On a Kunpeng 920 node with 1 core (alp_dispatch) or 64 cores (alp_omp);
+# Compiling with gcc 9.4.0 compiler;
+# Linking against KunpengBLAS (Kunpeng BoostKit 22.0.RC1) and netlib LAPACK.
+# All tests report time in milliseconds after "time (ms, ...)" text line.
+#
+# These instructions assume that you are using "Kunpeng BoostKit 22.0.RC1" extracted in a directory BLAS_ROOT
+# which should contain include/kblas.h file and the lib/kblas/ directory.
+# However, any other blas library could also be used.
+
+####################
+# Compilation and execution of the sequential Cholesky decomposition tests
+# which are testing our ALP Cholesky implementation, based on the alp_dispatch backend, against the potrf LAPACK functionality.
+####################
+
+# If no LAPACK library can be found by the compiler in system directories, LAPACK_LIB and LAPACK_INCLUDE have to be properly set and explicitly provided when calling cmake.
+# If you are using locally installed kblas, make sure to set proper BLAS_ROOT path to "kml" directory, i.e. extracted boostkit-kml-1.6.0-1.aarch64.rpm.
+cmake -DKBLAS_ROOT="$BLAS_ROOT" -DWITH_ALP_DISPATCH_BACKEND=ON -DCMAKE_INSTALL_PREFIX=./install $ALP_SOURCE || ( echo "test failed" && exit 1 )
+make install -j$(nproc) || ( echo "test failed" && exit 1 )
+
+# To compile and run the LAPACK Cholesky test (not ALP code).
+# Here you can use gcc flags, i.e. "-L/path/toib/ -llapack" (or simply " -llapack" to use system installed lapack library).
+# A consistent test should use the same BLAS in LAPACK as in the ALP-based tests.
+install/bin/grbcxx -b alp_dispatch -o cholesky_lapack_reference.exe $ALP_SOURCE/tests/performance/lapack_cholesky.cpp $LAPACK_LIB/liblapack.a -I$LAPACK_INCLUDE -lgfortran || ( echo "test failed" && exit 1 )
+
+LOGFILE=$LOGDIR/lapack_doptrf_seq.log
+echo "#####################################################################"
+echo " Testing potrf: LAPACK + KunpengBLAS (seq)" | tee -a $LOGFILE
+echo "#####################################################################"
+for MSIZE in {400..4000..100}
+do
+ ( ./cholesky_lapack_reference.exe -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 ) )
+done | tee -a $LOGFILE
+echo " Tests completed."
+echo "#####################################################################"
+
+# Run the Cholesky ALP dispatch sequential test.
+# Some facts about the test:
+# The algorithm is a blocked variant of Cholesky with block size BS = 64 (as done in LAPACK).
+# It recursively requires an unblocked version of the same algorithm (of size BSxBS) which does not dispatch to LAPACK.
+# All BLAS functions needed by the algorithm are dispatched to the external BLAS library.
+make test_alp_cholesky_perf_alp_dispatch -j$(nproc) || ( echo "test failed" && exit 1 )
+LOGFILE=$LOGDIR/alp_dpotrf_seq.log
+echo "#####################################################################"
+echo " Testing potrf: ALP + KunpengBLAS (seq)" | tee -a $LOGFILE
+echo "#####################################################################"
+for MSIZE in {400..4000..100}
+do
+ ( tests/performance/alp_cholesky_perf_alp_dispatch -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 ) )
+done | tee -a $LOGFILE
+echo " Tests completed."
+echo "#####################################################################"
+
+####################
+# Compilation and execution of the shared-memory Cholesky decomposition tests
+# which are testing our ALP Cholesky implementation, based on the alp_dispatch backend, against the potrf LAPACK functionality.
+# Differently from the sequential test we link against the shared-memory KBLAS library.
+####################
+
+# Assuming that you are currently in the "build" directory of the ALP cloned repository.
+# If no LAPACK library can be found by the compiler in system directories, LAPACK_LIB and LAPACK_INCLUDE have to be properly set and explicitly provided when calling cmake.
+# If you are using locally installed kblas, make sure to set proper BLAS_ROOT path to "kml" directory, i.e. extracted boostkit-kml-1.6.0-1.aarch64.rpm.
+
+subbuild="build_potrf_with_omp_blas"
+rm -rf $subbuild && mkdir $subbuild && cd $subbuild
+cmake -DKBLAS_ROOT="$BLAS_ROOT" -DKBLAS_IMPL=omp -DWITH_ALP_OMP_BACKEND=ON -DWITH_ALP_DISPATCH_BACKEND=ON -DCMAKE_INSTALL_PREFIX=./install $ALP_SOURCE || ( echo "test failed" && exit 1 )
+make install -j$(nproc) || ( echo "test failed" && exit 1 )
+
+# To compile and run the LAPACK Cholesky test (not ALP code).
+# Here you can use gcc flags, i.e. "-L/path/toib/ -llapack" (or simply " -llapack" to use system installed lapack library).
+# A consistent test should use the same BLAS in LAPACK as in the ALP-based tests.
+install/bin/grbcxx -b alp_dispatch -o cholesky_lapack_omp.exe $ALP_SOURCE/tests/performance/lapack_cholesky.cpp $LAPACK_LIB/liblapack.a -I$LAPACK_INCLUDE -lgfortran || ( echo "test failed" && exit 1 )
+
+LOGFILE=$LOGDIR/lapack_dpotrf_omp.log
+for NT in 1 64 96
+do
+ echo "#####################################################################"
+ echo " Testing potrf: LAPACK + KunpengBLAS (omp) with OMP_NUM_THREADS=${NT}"
+ echo "#####################################################################"
+ for MSIZE in {400..4000..100}
+ do
+ OMP_NUM_THREADS=${NT} ./cholesky_lapack_omp.exe -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 )
+ done
+ echo " Tests completed."
+ echo "#####################################################################"
+done | tee -a $LOGFILE
+
+# Run the Cholesky ALP dispatch sequential test.
+# Some facts about the test:
+# The algorithm is a blocked variant of Cholesky with block size BS = 64 (as done in LAPACK).
+# It recursively requires an unblocked version of the same algorithm (of size BSxBS) which does not dispatch to LAPACK.
+# All BLAS functions needed by the algorithm are dispatched to the external BLAS library.
+make test_alp_cholesky_perf_alp_dispatch -j$(nproc) || ( echo "test failed" && exit 1 )
+LOGFILE=$LOGDIR/alp_dpotrf_omp.log
+for NT in 1 64 96
+do
+ echo "##########################################################################"
+ echo "Testing potrf: Testing ALP + KunpengBLAS (omp) with OMP_NUM_THREADS=${NT}"
+ echo "##########################################################################"
+ for MSIZE in {400..4000..100}
+ do
+ OMP_NUM_THREADS=${NT} tests/performance/alp_cholesky_perf_alp_dispatch -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 )
+ done
+ echo " Tests completed."
+ echo "##########################################################################"
+done | tee -a $LOGFILE
+cd $ALP_BUILD
+
+####################
+# Compilation and execution of shared memory parallel mxm tests
+# which are testing our ALP shared memory backend (alp_omp) mxm implementation against the BLAS's gemm functionality.
+# mxm is an inplace, ALP primitive that computes C = C + A*B, with matrices of conforming sizes.
+####################
+
+# Our current shared memory backend implementation is not very flexible and can only use squared thread grids.
+# In the tests below we run both LAPACK and ALP using 64 threads.
+# To ensure a fair comparison, we link with the omp version of KunpengBLAS.
+#
+# You can compile with omp version of kblas library by additionally providing " -DKBLAS_IMPL=omp" flag when calling cmake.
+# However, this should be compiled in a different directory from the other blas calls, as follows:
+subbuild="build_mxm_with_omp_blas"
+rm -rf $subbuild && mkdir $subbuild && cd $subbuild
+cmake -DKBLAS_ROOT="$BLAS_ROOT" -DKBLAS_IMPL=omp -DWITH_ALP_OMP_BACKEND=ON -DWITH_ALP_DISPATCH_BACKEND=ON -DCMAKE_INSTALL_PREFIX=./install $ALP_SOURCE || ( echo "test failed" && exit 1 )
+make install -j$(nproc) || ( echo "test failed" && exit 1 )
+
+# Compile and run gemm-based BLAS test.
+install/bin/grbcxx -b alp_dispatch -o blas_mxm.exe $ALP_SOURCE/tests/performance/blas_mxm.cpp -lgfortran || ( echo "test failed" && exit 1 )
+
+LOGFILE=$LOGDIR/kblas_mxm_omp.log
+echo "##########################################################################"
+echo "Testing mxm: Testing KunpengBLAS (omp) with OMP_NUM_THREADS=64" | tee -a $LOGFILE
+echo "##########################################################################"
+for MSIZE in {1024..10240..1024}
+do
+ OMP_NUM_THREADS=64 ./blas_mxm.exe -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 )
+done | tee -a $LOGFILE
+echo " Tests completed."
+echo "##########################################################################"
+
+cd $ALP_BUILD
+
+# Run mxm omp test.
+# Some facts about the ALP test:
+# The ALP mxm implementation is based on a 2.5D algorithm;
+# In this test we execute with a 3D thread grid of size 4x4x4;
+# We set OMP_NUM_THREADS=64 threads and fix GOMP_CPU_AFFINITY="0-15 24-39 48-63 72-87" to reflect the NUMA domains in the node;
+# The algorithm is allocating memory using a 2D block-cyclic layout with blocks of size 128x128.
+
+subbuild="build_mxm_with_alp_omp"
+rm -rf $subbuild && mkdir $subbuild && cd $subbuild
+cmake -DKBLAS_ROOT="$BLAS_ROOT" -DWITH_ALP_DISPATCH_BACKEND=ON -DWITH_ALP_OMP_BACKEND=ON -DCMAKE_INSTALL_PREFIX=./install $ALP_SOURCE || ( echo "test failed" && exit 1 )
+make test_alp_mxm_perf_alp_omp -j$(nproc) || ( echo "test failed" && exit 1 )
+
+LOGFILE=$LOGDIR/alp_mxm_omp.log
+echo "##########################################################################"
+echo "Testing mxm: Testing KunpengBLAS (omp) with:" | tee -a $LOGFILE
+echo " OMP_NUM_THREADS=64 GOMP_CPU_AFFINITY=\"0-15 24-39 48-63 72-87\"" | tee -a $LOGFILE
+echo "##########################################################################"
+for MSIZE in {1024..10240..1024}
+do
+ GOMP_CPU_AFFINITY="0-15 24-39 48-63 72-87" OMP_NUM_THREADS=64 tests/performance/alp_mxm_perf_alp_omp -n ${MSIZE} -repeat 10 || ( echo "test failed" && exit 1 )
+done | tee -a $LOGFILE
+echo " Tests completed."
+echo "##########################################################################"
+
+cd $ALP_BUILD
diff --git a/bootstrap.sh b/bootstrap.sh
index e24d75d45..1b623b4f5 100755
--- a/bootstrap.sh
+++ b/bootstrap.sh
@@ -74,6 +74,7 @@ the location where LPF is installed"
echo " --with-banshee= - path to the the tools to compile the banshee backend"
echo " --with-snitch= - path to the tools for Snitch support within the banshee backend"
echo " --with-datasets= - path to the main testing datasets (use tools/downloadDatasets.sh to download)"
+ echo " --no-alp-reference - to compile without support for dense algebraic programming"
echo " --no-reference - disables the reference and reference_omp backends"
echo " --no-hyperdags - disables the hyperdags backend"
echo " --with-hyperdags-using= - uses the given backend reference for HyperDAG generation"
@@ -104,6 +105,9 @@ hyperdags_using=reference
nonblocking=yes
banshee=no
lpf=no
+alp_reference=yes
+alp_dispatch=no
+alp_omp=no
show=no
FLAGS=$''
LPF_INSTALL_PATH=
@@ -163,6 +167,14 @@ or assume default paths (--with-lpf)"
--with-datasets=*)
DATASETS_PATH="${arg#--with-datasets=}"
;;
+ --no-alp-reference)
+ alp_reference=no
+ ;;
+ --no-dense)
+ alp_reference=no
+ alp_dispatch=no
+ alp_omp=no
+ ;;
--no-reference)
reference=no
;;
@@ -363,6 +375,21 @@ the current directory before invocation or confirm the deletion of its content w
if [[ "${nonblocking}" == "no" ]]; then
CMAKE_OPTS+=" -DWITH_NONBLOCKING_BACKEND=OFF"
fi
+ if [[ "${alp_reference}" == "no" ]]; then
+ CMAKE_OPTS+=" -DWITH_ALP_REFERENCE_BACKEND=OFF"
+ else
+ CMAKE_OPTS+=" -DWITH_ALP_REFERENCE_BACKEND=ON"
+ fi
+ if [[ "${alp_dispatch}" == "no" ]]; then
+ CMAKE_OPTS+=" -DWITH_ALP_DISPATCH_BACKEND=OFF"
+ else
+ CMAKE_OPTS+=" -DWITH_ALP_DISPATCH_BACKEND=ON"
+ fi
+ if [[ "${alp_omp}" == "no" ]]; then
+ CMAKE_OPTS+=" -DWITH_ALP_OMP_BACKEND=OFF"
+ else
+ CMAKE_OPTS+=" -DWITH_ALP_OMP_BACKEND=ON"
+ fi
if [[ "${lpf}" == "yes" ]]; then
CMAKE_OPTS+=" -DLPF_INSTALL_PATH='${ABSOLUTE_LPF_INSTALL_PATH}'"
fi
diff --git a/cmake/AddGRBInstall.cmake b/cmake/AddGRBInstall.cmake
index 94bd58f31..78caf2fe1 100644
--- a/cmake/AddGRBInstall.cmake
+++ b/cmake/AddGRBInstall.cmake
@@ -45,6 +45,9 @@ install( EXPORT GraphBLASTargets
set( ALP_UTILS_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}" )
set( SHMEM_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/sequential" )
set( HYPERDAGS_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/hyperdags" )
+set( ALP_REFERENCE_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/alp/reference" )
+set( ALP_DISPATCH_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/alp/dispatch" )
+set( ALP_OMP_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/alp/omp" )
set( BSP1D_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/spmd" )
set( HYBRID_BACKEND_INSTALL_DIR "${BINARY_LIBRARIES_INSTALL_DIR}/hybrid" )
@@ -121,6 +124,34 @@ if( WITH_REFERENCE_BACKEND )
)
endif()
+if( WITH_ALP_REFERENCE_BACKEND )
+ addBackendWrapperGenOptions( "alp_reference"
+ COMPILE_DEFINITIONS "ALP_REFERENCE_INCLUDE_DEFS" "${ALP_REFERENCE_SELECTION_DEFS}"
+ LINK_FLAGS "${ALP_REFERENCE_BACKEND_INSTALL_DIR}/lib${BACKEND_LIBRARY_OUTPUT_NAME}.a"
+ )
+endif()
+
+if( WITH_ALP_DISPATCH_BACKEND )
+ set( _blas_libraries ${BLAS_LIBRARIES} )
+ foreach( _lib ${BLAS_LIBRARIES} )
+ get_filename_component(_path ${_lib} DIRECTORY)
+ set( _path " -Wl,-rpath ${_path}" )
+ list(APPEND _lib_lists ${_path} )
+ endforeach()
+ #list( TRANSFORM _blas_libraries PREPEND "-l" )
+ list( JOIN _lib_lists " " _blas_rpaths )
+ list( JOIN _blas_libraries " " _blas_link_libs )
+ set( _cxx_additional_includes " -I${INCLUDE_INSTALL_DIR}/blas_wrapper " )
+ if( NOT ${KBLAS_INCLUDE_DIR} STREQUAL "" )
+ set( _cxx_additional_includes " -I${KBLAS_INCLUDE_DIR} ${_cxx_additional_includes}" )
+ endif()
+ addBackendWrapperGenOptions( "alp_dispatch"
+ COMPILE_DEFINITIONS "ALP_DISPATCH_INCLUDE_DEFS" "${ALP_DISPATCH_SELECTION_DEFS}"
+ LINK_FLAGS "${ALP_DISPATCH_BACKEND_INSTALL_DIR}/lib${BACKEND_LIBRARY_OUTPUT_NAME}.a ${_blas_link_libs} ${_blas_rpaths}"
+ COMPILE_OPTIONS "${_cxx_additional_includes}"
+ )
+endif()
+
if( WITH_OMP_BACKEND )
addBackendWrapperGenOptions( "reference_omp"
COMPILE_DEFINITIONS "${REFERENCE_OMP_SELECTION_DEFS}"
@@ -146,6 +177,14 @@ if( WITH_NONBLOCKING_BACKEND )
)
endif()
+if( WITH_ALP_OMP_BACKEND )
+ addBackendWrapperGenOptions( "alp_omp"
+ COMPILE_DEFINITIONS "${ALP_OMP_SELECTION_DEFS}"
+ LINK_FLAGS "'${ALP_OMP_BACKEND_INSTALL_DIR}/lib${BACKEND_LIBRARY_OUTPUT_NAME}.a'"
+ "'${ALP_UTILS_INSTALL_DIR}/lib${ALP_UTILS_LIBRARY_OUTPUT_NAME}.a'" "${NUMA_LFLAG}"
+ )
+endif()
+
# distributed memory backends
if( WITH_BSP1D_BACKEND OR WITH_HYBRID_BACKEND )
assert_valid_variables( LPFRUN LPFCPP )
diff --git a/cmake/AddGRBVars.cmake b/cmake/AddGRBVars.cmake
index fab0f9ac9..7b5d12d9a 100644
--- a/cmake/AddGRBVars.cmake
+++ b/cmake/AddGRBVars.cmake
@@ -33,6 +33,10 @@ set( BSP1D_BACKEND_DEFAULT_NAME "backend_bsp1d" )
set( HYBRID_BACKEND_DEFAULT_NAME "backend_hybrid" )
set( HYPERDAGS_BACKEND_DEFAULT_NAME "backend_hyperdags" )
set( NONBLOCKING_BACKEND_DEFAULT_NAME "backend_nonblocking" )
+set( ALP_REFERENCE_BACKEND_DEFAULT_NAME "backend_alp_reference" )
+set( ALP_DISPATCH_BACKEND_DEFAULT_NAME "backend_alp_dispatch" )
+set( ALP_OMP_BACKEND_DEFAULT_NAME "backend_alp_omp" )
+
### COMPILER DEFINITIONS FOR HEADERS INCLUSION AND FOR BACKEND SELECTION
@@ -42,6 +46,9 @@ set( REFERENCE_OMP_INCLUDE_DEFS "_GRB_WITH_OMP" )
set( HYPERDAGS_INCLUDE_DEFS "_GRB_WITH_HYPERDAGS" )
set( NONBLOCKING_INCLUDE_DEFS "_GRB_WITH_NONBLOCKING" )
set( LPF_INCLUDE_DEFS "_GRB_WITH_LPF" )
+set( ALP_REFERENCE_INCLUDE_DEFS "_ALP_WITH_REFERENCE" )
+set( ALP_DISPATCH_INCLUDE_DEFS "_ALP_WITH_DISPATCH" )
+set( ALP_OMP_INCLUDE_DEFS "_ALP_WITH_OMP;_ALP_OMP_WITH_DISPATCH" )
# compiler definitions to select a backend
set( REFERENCE_SELECTION_DEFS "_GRB_BACKEND=reference" )
@@ -51,6 +58,12 @@ set( HYPERDAGS_SELECTION_DEFS
"_GRB_WITH_HYPERDAGS_USING=${WITH_HYPERDAGS_USING}"
)
set( NONBLOCKING_SELECTION_DEFS "_GRB_BACKEND=nonblocking" )
+set( ALP_REFERENCE_SELECTION_DEFS "_ALP_BACKEND=reference" )
+set( ALP_DISPATCH_SELECTION_DEFS "_ALP_BACKEND=dispatch" )
+set( ALP_OMP_SELECTION_DEFS
+ "_ALP_BACKEND=omp"
+ "_ALP_SECONDARY_BACKEND=dispatch"
+)
set( BSP1D_SELECTION_DEFS
"_GRB_BACKEND=BSP1D"
"_GRB_BSP1D_BACKEND=reference"
@@ -64,7 +77,7 @@ set( HYBRID_SELECTION_DEFS
set( NO_NUMA_DEF "_GRB_NO_LIBNUMA" )
### **ALL** BACKENDS, EVEN IF NOT ENABLED BY USER
-set( ALL_BACKENDS "reference" "reference_omp" "hyperdags" "nonblocking" "bsp1d" "hybrid" )
+set( ALL_BACKENDS "reference" "reference_omp" "hyperdags" "nonblocking" "bsp1d" "hybrid" "alp_reference" "alp_dispatch" "alp_omp" )
# list of user-enabled backends, for tests and wrapper scripts (do not change!)
set( AVAILABLE_BACKENDS "" )
@@ -90,6 +103,18 @@ if( WITH_NONBLOCKING_BACKEND )
list( APPEND AVAILABLE_BACKENDS "nonblocking" )
endif()
+if( WITH_ALP_REFERENCE_BACKEND )
+ list( APPEND AVAILABLE_BACKENDS "alp_reference" )
+endif()
+
+if( WITH_ALP_DISPATCH_BACKEND )
+ list( APPEND AVAILABLE_BACKENDS "alp_dispatch" )
+endif()
+
+if( WITH_ALP_OMP_BACKEND )
+ list( APPEND AVAILABLE_BACKENDS "alp_omp" )
+endif()
+
# distributed memory backends
if( WITH_BSP1D_BACKEND )
list( APPEND AVAILABLE_BACKENDS "bsp1d" )
@@ -99,5 +124,7 @@ if( WITH_HYBRID_BACKEND )
list( APPEND AVAILABLE_BACKENDS "hybrid" )
endif()
+message( STATUS "\n######### Configured with the following backends: #########\n${AVAILABLE_BACKENDS}\n" )
+
# add your own here!
diff --git a/cmake/Blas.cmake b/cmake/Blas.cmake
new file mode 100644
index 000000000..09ffae4a5
--- /dev/null
+++ b/cmake/Blas.cmake
@@ -0,0 +1,26 @@
+add_library(cblas INTERFACE)
+if(KBLAS_ROOT)
+ find_package(Kblas REQUIRED)
+ target_link_libraries(cblas INTERFACE Kblas::Kblas)
+ set(HEADER_NAME "kblas")
+else()
+ find_package(BLAS REQUIRED)
+ add_library( extBlas::extBlas UNKNOWN IMPORTED )
+ set_target_properties( extBlas::extBlas
+ PROPERTIES
+ IMPORTED_LOCATION "${BLAS_LIBRARIES}"
+ INTERFACE_LINK_OPTIONS "${BLAS_LINKER_FLAGS}"
+ #INTERFACE_INCLUDE_DIRECTORIES ${}
+ )
+
+ target_link_libraries(cblas INTERFACE extBlas::extBlas)
+ set(HEADER_NAME "cblas")
+endif()
+
+file(WRITE "${CMAKE_BINARY_DIR}/blas_wrapper/alp_blas.h" "#include \"${HEADER_NAME}.h\"\n" )
+#target_include_directories(cblas INTERFACE "${CMAKE_BINARY_DIR}/blas_wrapper" )
+
+target_include_directories( cblas INTERFACE
+ $
+ $
+)
diff --git a/cmake/FindKblas.cmake b/cmake/FindKblas.cmake
new file mode 100644
index 000000000..101b7df29
--- /dev/null
+++ b/cmake/FindKblas.cmake
@@ -0,0 +1,137 @@
+#
+# Copyright 2021 Huawei Technologies Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#[===================================================================[
+Find libkblas inside the standard system directories
+
+Read-only output variables:
+ KBLAS_FOUND
+ Indicates that the library has been found.
+
+ KBLAS_INCLUDE_DIR
+ Points to the libkblas include directory.
+
+ KBLAS_LIBRARY
+ Points to the libkblas that can be passed to target_link_libararies.
+
+creates a target Kblas::Kblas to link against libkblas
+#]===================================================================]
+
+# documentation of find_path() https://cmake.org/cmake/help/latest/command/find_path.html
+# documentation of find_library() https://cmake.org/cmake/help/latest/command/find_library.html
+
+if(NOT KBLAS_IMPL)
+ set(KBLAS_IMPL "locking")
+else()
+ #if( "${KBLAS_IMPL}" IN_LIST "locking;nolocking;omp;pthread" )
+ if( NOT "${KBLAS_IMPL}" MATCHES "^(locking|nolocking|omp|pthread)$")
+ message( "KBLAS_IMPL = ${KBLAS_IMPL}")
+ message(ERROR " wrong kblas implementation requested")
+ endif()
+endif()
+
+# find the root directory for libkblas
+find_path( KBLAS_ROOT_DIR
+ NAMES include/kblas.h # by checking where "include/kblas.h" exists
+ PATHS ${KBLAS_ROOT} # take as a hint the environment variable KBLAS_ROOT and
+ # add it to the default search paths
+ DOC "KBLAS root directory"
+)
+
+# look for the include directory
+# we should not assume the header is present, because some distributions have
+# different packages for binary-only versions (e.g., libkblas) and for
+# development-oriented versions (e.g., libkblas-dev); hence, look for the header
+# explicitly and raise an error if you cannot find it (otherwise targets will
+# surely not compile!)
+find_path( KBLAS_INCLUDE_DIR
+ NAMES kblas.h # by looking for this header file
+ HINTS ${KBLAS_ROOT_DIR} # start looking from KBLAS_ROOT_DIR, the most likely place
+ PATH_SUFFIXES include # when inspecting a path, look inside the include directory
+ DOC "KBLAS include directory"
+)
+
+# look for the binary library libkblas
+# do not give thorough hints here, because various Linux distributions may have different
+# conventions on shared binarie directories (/lib, /usr/lib, /usr/lib64, ...)
+# and we don't want to "blind" CMake's search
+find_library( KBLAS_LIBRARY
+ NAMES kblas # hence, CMake looks for libkblas.so, libkblas.so.,
+ # libkblas.a and so on (read find_library() guide for more details)
+ HINTS "${KBLAS_ROOT_DIR}/lib/kblas/${KBLAS_IMPL}" # start looking from KBLAS_ROOT_DIR, the most likely place
+ DOC "KBLAS library"
+)
+set( BLAS_LIBRARIES ${KBLAS_LIBRARY} )
+
+find_library( GFORTRAN_LIBRARY
+ NAMES gfortran # hence, CMake looks for libgfortran.so, libgfortran.so.,
+ # libgfortran.a and so on (read find_library() guide for more details)
+ HINTS ${CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES}
+)
+
+# if the listed variables are set to existing paths, set the Kblas_FOUND variable
+# if not and the REQUIRED option was given when calling this find_module(),
+# raise an error (some components were not found and we need all of them)
+include( FindPackageHandleStandardArgs )
+find_package_handle_standard_args( Kblas
+ REQUIRED_VARS KBLAS_ROOT_DIR KBLAS_INCLUDE_DIR KBLAS_LIBRARY GFORTRAN_LIBRARY
+)
+
+# if we found the library, create a dedicated target with all needed information
+if( Kblas_FOUND )
+ # do not show these variables as cached ones
+ mark_as_advanced( KBLAS_ROOT_DIR KBLAS_INCLUDE_DIR KBLAS_LIBRARY BLAS_LIBRARIES )
+
+ # create an imported target, i.e. a target NOT built internally, as from
+ # https://cmake.org/cmake/help/latest/command/add_library.html#imported-libraries
+ # this way, depending targets may link against libkblas with target_link_libraries(),
+ # as if it was an internal target
+ # UNKNOWN tells CMake to inspect the library type (static or shared)
+ # e.g., if you compiled your own static libkblas and injected it via KBLAS_ROOT
+ # it will work out without changes
+ add_library ( gfortran::gfortran UNKNOWN IMPORTED )
+ add_library ( Kblas::Kblas UNKNOWN IMPORTED )
+ # set its properties to the appropiate locations, for both headers and binaries
+ set_target_properties( gfortran::gfortran
+ PROPERTIES
+ IMPORTED_LOCATION "${GFORTRAN_LIBRARY}"
+ )
+ set_target_properties( Kblas::Kblas
+ PROPERTIES
+ IMPORTED_LOCATION "${KBLAS_LIBRARY}"
+ INTERFACE_INCLUDE_DIRECTORIES ${KBLAS_INCLUDE_DIR}
+ )
+ if(NOT LibM_FOUND)
+ find_package(LibM REQUIRED)
+ endif()
+ target_link_libraries(Kblas::Kblas INTERFACE LibM::LibM gfortran::gfortran)
+ if(${KBLAS_IMPL} STREQUAL "omp")
+ if(NOT OpenMP_FOUND)
+ find_package(OpenMP REQUIRED)
+ endif()
+ target_link_libraries(Kblas::Kblas INTERFACE OpenMP::OpenMP_C)
+ elseif(${KBLAS_IMPL} STREQUAL "pthread")
+ if(NOT Threads_FOUND)
+ find_package(Threads REQUIRED)
+ endif()
+ if(NOT CMAKE_USE_PTHREADS_INIT)
+ message(ERROR "pthread not found")
+ endif()
+ target_link_libraries(Kblas::Kblas INTERFACE Threads::Threads)
+ endif()
+endif()
+
+
diff --git a/cmake/KunpengBLAS.cmake b/cmake/KunpengBLAS.cmake
new file mode 100644
index 000000000..cb4ad4f6b
--- /dev/null
+++ b/cmake/KunpengBLAS.cmake
@@ -0,0 +1,67 @@
+#
+# Copyright 2022 Huawei Technologies Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#[===================================================================[
+Find libkml inside the standard system directories
+
+Read-only output variables:
+ KML_FOUND
+ Indicates that the library has been found.
+
+ KML_INCLUDE_DIR
+ Points to the libkml include directory.
+
+creates a target kml::kml to link against libkml
+#]===================================================================]
+
+# documentation of find_path() https://cmake.org/cmake/help/latest/command/find_path.html
+# documentation of find_library() https://cmake.org/cmake/help/latest/command/find_library.html
+
+# find the root directory for libkml
+find_path( KML_ROOT_DIR
+ NAMES lib/kml.h # by checking where "lib/kml.h" exists
+ HINTS ${KML_SOURCE} # start looking from KML_SOURCE, the most likely place
+)
+
+# if the listed variables are set to existing paths, set the kml_FOUND variable
+# if not and the REQUIRED option was given when calling this find_module(),
+# raise an error (some components were not found and we need all of them)
+include( FindPackageHandleStandardArgs )
+find_package_handle_standard_args( KML
+ REQUIRED_VARS KML_ROOT_DIR
+)
+
+# if we found the library, create a dedicated target with all needed information
+if( KML_FOUND )
+ # do not show these variables as cached ones
+ mark_as_advanced( KML_ROOT_DIR )
+
+ # create an imported target, i.e. a target NOT built internally, as from
+ # https://cmake.org/cmake/help/latest/command/add_library.html#imported-libraries
+ # this way, depending targets may link against libkml with target_link_libraries(),
+ # as if it was an internal target
+ # UNKNOWN tells CMake to inspect the library type (static or shared)
+ # e.g., if you compiled your own static libkml and injected it via KML_ROOT
+ # it will work out without changes
+ add_library ( kml INTERFACE )
+ # set its properties to the appropiate locations, for both headers and binaries
+ # set_target_properties( kml::kml
+ # PROPERTIES
+ # INTERFACE_INCLUDE_DIRECTORIES "${KML_ROOT_DIR}"
+ # )
+ target_include_directories ( kml INTERFACE ${KML_ROOT_DIR}
+ )
+endif()
diff --git a/docs/alp-public.conf b/docs/alp-public.conf
new file mode 100644
index 000000000..937501d52
--- /dev/null
+++ b/docs/alp-public.conf
@@ -0,0 +1,2453 @@
+# Doxyfile 1.8.14
+
+#
+# Copyright 2021 Huawei Technologies Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project.
+#
+# All text after a double hash (##) is considered a comment and is placed in
+# front of the TAG it is preceding.
+#
+# All text after a single hash (#) is considered a comment and will be ignored.
+# The format is:
+# TAG = value [value, ...]
+# For lists, items can also be appended using:
+# TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (\" \").
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all text
+# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
+# built into libc) for the transcoding. See
+# https://www.gnu.org/software/libiconv/ for the list of possible encodings.
+# The default value is: UTF-8.
+
+DOXYFILE_ENCODING = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
+# double-quotes, unless you are using Doxywizard) that should identify the
+# project for which the documentation is generated. This name is used in the
+# title of most generated pages and in a few other places.
+# The default value is: My Project.
+
+PROJECT_NAME = "ALP Public API"
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
+# could be handy for archiving the generated documentation or if some version
+# control system is used.
+
+PROJECT_NUMBER = alpha
+
+# Using the PROJECT_BRIEF tag one can provide an optional one line description
+# for a project that appears at the top of each page and should give viewer a
+# quick idea about the purpose of the project. Keep the description short.
+
+PROJECT_BRIEF =
+
+# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
+# in the documentation. The maximum height of the logo should not exceed 55
+# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
+# the logo to the output directory.
+
+PROJECT_LOGO =
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
+# into which the generated documentation will be written. If a relative path is
+# entered, it will be relative to the location where doxygen was started. If
+# left blank the current directory will be used.
+
+OUTPUT_DIRECTORY = docs/alp-public
+
+# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
+# directories (in 2 levels) under the output directory of each output format and
+# will distribute the generated files over these directories. Enabling this
+# option can be useful when feeding doxygen a huge amount of source files, where
+# putting all generated files in the same directory would otherwise causes
+# performance problems for the file system.
+# The default value is: NO.
+
+CREATE_SUBDIRS = NO
+
+# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
+# characters to appear in the names of generated files. If set to NO, non-ASCII
+# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
+# U+3044.
+# The default value is: NO.
+
+ALLOW_UNICODE_NAMES = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
+# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
+# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
+# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
+# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
+# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
+# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
+# Ukrainian and Vietnamese.
+# The default value is: English.
+
+OUTPUT_LANGUAGE = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
+# descriptions after the members that are listed in the file and class
+# documentation (similar to Javadoc). Set to NO to disable this.
+# The default value is: YES.
+
+BRIEF_MEMBER_DESC = YES
+
+# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
+# description of a member or function before the detailed description
+#
+# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+# The default value is: YES.
+
+REPEAT_BRIEF = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator that is
+# used to form the text in various listings. Each string in this list, if found
+# as the leading text of the brief description, will be stripped from the text
+# and the result, after processing the whole list, is used as the annotated
+# text. Otherwise, the brief description is used as-is. If left blank, the
+# following values are used ($name is automatically replaced with the name of
+# the entity):The $name class, The $name widget, The $name file, is, provides,
+# specifies, contains, represents, a, an and the.
+
+ABBREVIATE_BRIEF =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# doxygen will generate a detailed section even if there is only a brief
+# description.
+# The default value is: NO.
+
+ALWAYS_DETAILED_SEC = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+# The default value is: NO.
+
+INLINE_INHERITED_MEMB = NO
+
+# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
+# before files name in the file list and in the header files. If set to NO the
+# shortest path that makes the file name unique will be used
+# The default value is: YES.
+
+FULL_PATH_NAMES = YES
+
+# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
+# Stripping is only done if one of the specified strings matches the left-hand
+# part of the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the path to
+# strip.
+#
+# Note that you can specify absolute paths here, but also relative paths, which
+# will be relative from the directory where doxygen is started.
+# This tag requires that the tag FULL_PATH_NAMES is set to YES.
+
+STRIP_FROM_PATH =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
+# path mentioned in the documentation of a class, which tells the reader which
+# header file to include in order to use a class. If left blank only the name of
+# the header file containing the class definition is used. Otherwise one should
+# specify the list of include paths that are normally passed to the compiler
+# using the -I flag.
+
+STRIP_FROM_INC_PATH =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
+# less readable) file names. This can be useful is your file systems doesn't
+# support long names like on DOS, Mac, or CD-ROM.
+# The default value is: NO.
+
+SHORT_NAMES = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
+# first line (until the first dot) of a Javadoc-style comment as the brief
+# description. If set to NO, the Javadoc-style will behave just like regular Qt-
+# style comments (thus requiring an explicit @brief command for a brief
+# description.)
+# The default value is: NO.
+
+JAVADOC_AUTOBRIEF = YES
+
+# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
+# line (until the first dot) of a Qt-style comment as the brief description. If
+# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
+# requiring an explicit \brief command for a brief description.)
+# The default value is: NO.
+
+QT_AUTOBRIEF = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
+# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
+# a brief description. This used to be the default behavior. The new default is
+# to treat a multi-line C++ comment block as a detailed description. Set this
+# tag to YES if you prefer the old behavior instead.
+#
+# Note that setting this tag to YES also means that rational rose comments are
+# not recognized any more.
+# The default value is: NO.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
+# documentation from any documented member that it re-implements.
+# The default value is: YES.
+
+INHERIT_DOCS = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
+# page for each member. If set to NO, the documentation of a member will be part
+# of the file/class/namespace that contains it.
+# The default value is: NO.
+
+SEPARATE_MEMBER_PAGES = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
+# uses this value to replace tabs by spaces in code fragments.
+# Minimum value: 1, maximum value: 16, default value: 4.
+
+TAB_SIZE = 4
+
+# This tag can be used to specify a number of aliases that act as commands in
+# the documentation. An alias has the form:
+# name=value
+# For example adding
+# "sideeffect=@par Side Effects:\n"
+# will allow you to put the command \sideeffect (or @sideeffect) in the
+# documentation, which will result in a user-defined paragraph with heading
+# "Side Effects:". You can put \n's in the value part of an alias to insert
+# newlines (in the resulting output). You can put ^^ in the value part of an
+# alias to insert a newline as if a physical newline was in the original file.
+
+ALIASES =
+
+# This tag can be used to specify a number of word-keyword mappings (TCL only).
+# A mapping has the form "name=value". For example adding "class=itcl::class"
+# will allow you to use the command class in the itcl::class meaning.
+
+TCL_SUBST =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
+# only. Doxygen will then generate output that is more tailored for C. For
+# instance, some of the names that are used will be different. The list of all
+# members will be omitted, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_FOR_C = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
+# Python sources only. Doxygen will then generate output that is more tailored
+# for that language. For instance, namespaces will be presented as packages,
+# qualified scopes will look different, etc.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_JAVA = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources. Doxygen will then generate output that is tailored for Fortran.
+# The default value is: NO.
+
+OPTIMIZE_FOR_FORTRAN = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for VHDL.
+# The default value is: NO.
+
+OPTIMIZE_OUTPUT_VHDL = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it
+# parses. With this tag you can assign which parser to use for a given
+# extension. Doxygen has a built-in mapping, but you can override or extend it
+# using this tag. The format is ext=language, where ext is a file extension, and
+# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
+# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
+# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
+# Fortran. In the later case the parser tries to guess whether the code is fixed
+# or free formatted code, this is the default for Fortran type files), VHDL. For
+# instance to make doxygen treat .inc files as Fortran files (default is PHP),
+# and .f files as C (default is Fortran), use: inc=Fortran f=C.
+#
+# Note: For files without extension you can use no_extension as a placeholder.
+#
+# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
+# the files are not read by doxygen.
+
+EXTENSION_MAPPING =
+
+# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
+# according to the Markdown format, which allows for more readable
+# documentation. See http://daringfireball.net/projects/markdown/ for details.
+# The output of markdown processing is further processed by doxygen, so you can
+# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
+# case of backward compatibilities issues.
+# The default value is: YES.
+
+MARKDOWN_SUPPORT = YES
+
+# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up
+# to that level are automatically included in the table of contents, even if
+# they do not have an id attribute.
+# Note: This feature currently applies only to Markdown headings.
+# Minimum value: 0, maximum value: 99, default value: 0.
+# This tag requires that the tag MARKDOWN_SUPPORT is set to YES.
+
+TOC_INCLUDE_HEADINGS = 0
+
+# When enabled doxygen tries to link words that correspond to documented
+# classes, or namespaces to their corresponding documentation. Such a link can
+# be prevented in individual cases by putting a % sign in front of the word or
+# globally by setting AUTOLINK_SUPPORT to NO.
+# The default value is: YES.
+
+AUTOLINK_SUPPORT = YES
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should set this
+# tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string);
+# versus func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+# The default value is: NO.
+
+BUILTIN_STL_SUPPORT = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+# The default value is: NO.
+
+CPP_CLI_SUPPORT = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
+# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen
+# will parse them like normal C++ but will assume all classes use public instead
+# of private inheritance when no explicit protection keyword is present.
+# The default value is: NO.
+
+SIP_SUPPORT = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate
+# getter and setter methods for a property. Setting this option to YES will make
+# doxygen to replace the get and set methods by a property in the documentation.
+# This will only work if the methods are indeed getting or setting a simple
+# type. If this is not the case, or you want to show the methods anyway, you
+# should set this option to NO.
+# The default value is: YES.
+
+IDL_PROPERTY_SUPPORT = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+# The default value is: NO.
+
+DISTRIBUTE_GROUP_DOC = NO
+
+# If one adds a struct or class to a group and this option is enabled, then also
+# any nested class or struct is added to the same group. By default this option
+# is disabled and one has to add nested compounds explicitly via \ingroup.
+# The default value is: NO.
+
+GROUP_NESTED_COMPOUNDS = NO
+
+# Set the SUBGROUPING tag to YES to allow class member groups of the same type
+# (for instance a group of public functions) to be put as a subgroup of that
+# type (e.g. under the Public Functions section). Set it to NO to prevent
+# subgrouping. Alternatively, this can be done per class using the
+# \nosubgrouping command.
+# The default value is: YES.
+
+SUBGROUPING = YES
+
+# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
+# are shown inside the group in which they are included (e.g. using \ingroup)
+# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
+# and RTF).
+#
+# Note that this feature does not work in combination with
+# SEPARATE_MEMBER_PAGES.
+# The default value is: NO.
+
+INLINE_GROUPED_CLASSES = NO
+
+# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
+# with only public data fields or simple typedef fields will be shown inline in
+# the documentation of the scope in which they are defined (i.e. file,
+# namespace, or group documentation), provided this scope is documented. If set
+# to NO, structs, classes, and unions are shown on a separate page (for HTML and
+# Man pages) or section (for LaTeX and RTF).
+# The default value is: NO.
+
+INLINE_SIMPLE_STRUCTS = NO
+
+# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
+# enum is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically be
+# useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+# The default value is: NO.
+
+TYPEDEF_HIDES_STRUCT = NO
+
+# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
+# cache is used to resolve symbols given their name and scope. Since this can be
+# an expensive process and often the same symbol appears multiple times in the
+# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
+# doxygen will become slower. If the cache is too large, memory is wasted. The
+# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
+# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
+# symbols. At the end of a run doxygen will report the cache usage and suggest
+# the optimal cache size from a speed point of view.
+# Minimum value: 0, maximum value: 9, default value: 0.
+
+LOOKUP_CACHE_SIZE = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
+# documentation are documented, even if no documentation was available. Private
+# class members and static file members will be hidden unless the
+# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
+# Note: This will also disable the warnings about undocumented members that are
+# normally produced when WARNINGS is set to YES.
+# The default value is: NO.
+
+EXTRACT_ALL = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
+# be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PRIVATE = NO
+
+# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
+# scope will be included in the documentation.
+# The default value is: NO.
+
+EXTRACT_PACKAGE = NO
+
+# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
+# included in the documentation.
+# The default value is: NO.
+
+EXTRACT_STATIC = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
+# locally in source files will be included in the documentation. If set to NO,
+# only classes defined in header files are included. Does not have any effect
+# for Java sources.
+# The default value is: YES.
+
+EXTRACT_LOCAL_CLASSES = NO
+
+# This flag is only useful for Objective-C code. If set to YES, local methods,
+# which are defined in the implementation section but not in the interface are
+# included in the documentation. If set to NO, only methods in the interface are
+# included.
+# The default value is: NO.
+
+EXTRACT_LOCAL_METHODS = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base name of
+# the file that contains the anonymous namespace. By default anonymous namespace
+# are hidden.
+# The default value is: NO.
+
+EXTRACT_ANON_NSPACES = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
+# undocumented members inside documented classes or files. If set to NO these
+# members will be included in the various overviews, but no documentation
+# section is generated. This option has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_MEMBERS = YES
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy. If set
+# to NO, these classes will be included in the various overviews. This option
+# has no effect if EXTRACT_ALL is enabled.
+# The default value is: NO.
+
+HIDE_UNDOC_CLASSES = YES
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
+# (class|struct|union) declarations. If set to NO, these declarations will be
+# included in the documentation.
+# The default value is: NO.
+
+HIDE_FRIEND_COMPOUNDS = YES
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
+# documentation blocks found inside the body of a function. If set to NO, these
+# blocks will be appended to the function's detailed documentation block.
+# The default value is: NO.
+
+HIDE_IN_BODY_DOCS = NO
+
+# The INTERNAL_DOCS tag determines if documentation that is typed after a
+# \internal command is included. If the tag is set to NO then the documentation
+# will be excluded. Set it to YES to include the internal documentation.
+# The default value is: NO.
+
+INTERNAL_DOCS = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
+# names in lower-case letters. If set to YES, upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+# The default value is: system dependent.
+
+CASE_SENSE_NAMES = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
+# their full class and namespace scopes in the documentation. If set to YES, the
+# scope will be hidden.
+# The default value is: NO.
+
+HIDE_SCOPE_NAMES = YES
+
+# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
+# append additional text to a page's title, such as Class Reference. If set to
+# YES the compound reference will be hidden.
+# The default value is: NO.
+
+HIDE_COMPOUND_REFERENCE= NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
+# the files that are included by a file in the documentation of that file.
+# The default value is: YES.
+
+SHOW_INCLUDE_FILES = NO
+
+# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
+# grouped member an include statement to the documentation, telling the reader
+# which file to include in order to use the member.
+# The default value is: NO.
+
+SHOW_GROUPED_MEMB_INC = NO
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
+# files with double quotes in the documentation rather than with sharp brackets.
+# The default value is: NO.
+
+FORCE_LOCAL_INCLUDES = NO
+
+# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
+# documentation for inline members.
+# The default value is: YES.
+
+INLINE_INFO = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
+# (detailed) documentation of file and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order.
+# The default value is: YES.
+
+SORT_MEMBER_DOCS = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
+# descriptions of file, namespace and class members alphabetically by member
+# name. If set to NO, the members will appear in declaration order. Note that
+# this will also influence the order of the classes in the class list.
+# The default value is: NO.
+
+SORT_BRIEF_DOCS = YES
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
+# (brief and detailed) documentation of class members so that constructors and
+# destructors are listed first. If set to NO the constructors will appear in the
+# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
+# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
+# member documentation.
+# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
+# detailed member documentation.
+# The default value is: NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
+# of group names into alphabetical order. If set to NO the group names will
+# appear in their defined order.
+# The default value is: NO.
+
+SORT_GROUP_NAMES = YES
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
+# fully-qualified names, including namespaces. If set to NO, the class list will
+# be sorted only by class name, not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the alphabetical
+# list.
+# The default value is: NO.
+
+SORT_BY_SCOPE_NAME = NO
+
+# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
+# type resolution of all parameters of a function it will reject a match between
+# the prototype and the implementation of a member function even if there is
+# only one candidate or it is obvious which candidate to choose by doing a
+# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
+# accept a match between prototype and implementation in such cases.
+# The default value is: NO.
+
+STRICT_PROTO_MATCHING = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
+# list. This list is created by putting \todo commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TODOLIST = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
+# list. This list is created by putting \test commands in the documentation.
+# The default value is: YES.
+
+GENERATE_TESTLIST = NO
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
+# list. This list is created by putting \bug commands in the documentation.
+# The default value is: YES.
+
+GENERATE_BUGLIST = NO
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
+# the deprecated list. This list is created by putting \deprecated commands in
+# the documentation.
+# The default value is: YES.
+
+GENERATE_DEPRECATEDLIST= NO
+
+# The ENABLED_SECTIONS tag can be used to enable conditional documentation
+# sections, marked by \if ... \endif and \cond
+# ... \endcond blocks.
+
+ENABLED_SECTIONS =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the
+# initial value of a variable or macro / define can have for it to appear in the
+# documentation. If the initializer consists of more lines than specified here
+# it will be hidden. Use a value of 0 to hide initializers completely. The
+# appearance of the value of individual variables and macros / defines can be
+# controlled using \showinitializer or \hideinitializer command in the
+# documentation regardless of this setting.
+# Minimum value: 0, maximum value: 10000, default value: 30.
+
+MAX_INITIALIZER_LINES = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at
+# the bottom of the documentation of classes and structs. If set to YES, the
+# list will mention the files that were used to generate the documentation.
+# The default value is: YES.
+
+SHOW_USED_FILES = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This
+# will remove the Files entry from the Quick Index and from the Folder Tree View
+# (if specified).
+# The default value is: YES.
+
+SHOW_FILES = NO
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces
+# page. This will remove the Namespaces entry from the Quick Index and from the
+# Folder Tree View (if specified).
+# The default value is: YES.
+
+SHOW_NAMESPACES = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command command input-file, where command is the value of the
+# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided
+# by doxygen. Whatever the program writes to standard output is used as the file
+# version. For an example see the documentation.
+
+FILE_VERSION_FILTER =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed
+# by doxygen. The layout file controls the global structure of the generated
+# output files in an output format independent way. To create the layout file
+# that represents doxygen's defaults, run doxygen with the -l option. You can
+# optionally specify a file name after the option, if omitted DoxygenLayout.xml
+# will be used as the name of the layout file.
+#
+# Note that if you run doxygen from a directory containing a file called
+# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE
+# tag is left empty.
+
+LAYOUT_FILE =
+
+# The CITE_BIB_FILES tag can be used to specify one or more bib files containing
+# the reference definitions. This must be a list of .bib files. The .bib
+# extension is automatically appended if omitted. This requires the bibtex tool
+# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info.
+# For LaTeX the style of the bibliography can be controlled using
+# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the
+# search path. See also \cite for info how to create references.
+
+CITE_BIB_FILES =
+
+#---------------------------------------------------------------------------
+# Configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated to
+# standard output by doxygen. If QUIET is set to YES this implies that the
+# messages are off.
+# The default value is: NO.
+
+QUIET = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES
+# this implies that the warnings are on.
+#
+# Tip: Turn warnings on while writing the documentation.
+# The default value is: YES.
+
+WARNINGS = YES
+
+# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate
+# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag
+# will automatically be disabled.
+# The default value is: YES.
+
+WARN_IF_UNDOCUMENTED = YES
+
+# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some parameters
+# in a documented function, or documenting parameters that don't exist or using
+# markup commands wrongly.
+# The default value is: YES.
+
+WARN_IF_DOC_ERROR = YES
+
+# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that
+# are documented, but have no documentation for their parameters or return
+# value. If set to NO, doxygen will only warn about wrong or incomplete
+# parameter documentation, but not about the absence of documentation.
+# The default value is: NO.
+
+WARN_NO_PARAMDOC = NO
+
+# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when
+# a warning is encountered.
+# The default value is: NO.
+
+WARN_AS_ERROR = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that doxygen
+# can produce. The string should contain the $file, $line, and $text tags, which
+# will be replaced by the file and line number from which the warning originated
+# and the warning text. Optionally the format may contain $version, which will
+# be replaced by the version of the file (if it could be obtained via
+# FILE_VERSION_FILTER)
+# The default value is: $file:$line: $text.
+
+WARN_FORMAT = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning and error
+# messages should be written. If left blank the output is written to standard
+# error (stderr).
+
+WARN_LOGFILE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag is used to specify the files and/or directories that contain
+# documented source files. You may enter file names like myfile.cpp or
+# directories like /usr/src/myproject. Separate the files or directories with
+# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
+# Note: If this tag is empty the current directory is searched.
+
+INPUT = include/alp.hpp \
+ include/alp/
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
+# libiconv (or the iconv built into libc) for the transcoding. See the libiconv
+# documentation (see: https://www.gnu.org/software/libiconv/) for the list of
+# possible encodings.
+# The default value is: UTF-8.
+
+INPUT_ENCODING = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and
+# *.h) to filter out the source-files in the directories.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# read by doxygen.
+#
+# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp,
+# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h,
+# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc,
+# *.m, *.markdown, *.md, *.mm, *.dox, *.py, *.pyw, *.f90, *.f95, *.f03, *.f08,
+# *.f, *.for, *.tcl, *.vhd, *.vhdl, *.ucf and *.qsf.
+
+FILE_PATTERNS = *.hpp \
+ *.cpp \
+ *.h \
+ *.c
+
+# The RECURSIVE tag can be used to specify whether or not subdirectories should
+# be searched for input files as well.
+# The default value is: NO.
+
+RECURSIVE = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should be
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+#
+# Note that relative paths are relative to the directory from which doxygen is
+# run.
+
+EXCLUDE = include/alp/base
+
+# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
+# directories that are symbolic links (a Unix file system feature) are excluded
+# from the input.
+# The default value is: NO.
+
+EXCLUDE_SYMLINKS = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories.
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories for example use the pattern */test/*
+
+EXCLUDE_PATTERNS =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+#
+# Note that the wildcards are matched against the file with absolute path, so to
+# exclude all test directories use the pattern */test/*
+
+EXCLUDE_SYMBOLS = internal \
+ IMF \
+ imf \
+ AutoDeleter \
+ SizeOf
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or directories
+# that contain example code fragments that are included (see the \include
+# command).
+
+EXAMPLE_PATH =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and
+# *.h) to filter out the source-files in the directories. If left blank all
+# files are included.
+
+EXAMPLE_PATTERNS =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude commands
+# irrespective of the value of the RECURSIVE tag.
+# The default value is: NO.
+
+EXAMPLE_RECURSIVE = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or directories
+# that contain images that are to be included in the documentation (see the
+# \image command).
+
+IMAGE_PATH =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command:
+#
+#
+#
+# where is the value of the INPUT_FILTER tag, and is the
+# name of an input file. Doxygen will then use the output that the filter
+# program writes to standard output. If FILTER_PATTERNS is specified, this tag
+# will be ignored.
+#
+# Note that the filter must not add or remove lines; it is applied before the
+# code is scanned, but not when the output code is generated. If lines are added
+# or removed, the anchors will not be placed correctly.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+INPUT_FILTER =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis. Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match. The filters are a list of the form: pattern=filter
+# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how
+# filters are used. If the FILTER_PATTERNS tag is empty or if none of the
+# patterns match the file name, INPUT_FILTER is applied.
+#
+# Note that for custom extensions or not directly supported extensions you also
+# need to set EXTENSION_MAPPING for the extension otherwise the files are not
+# properly processed by doxygen.
+
+FILTER_PATTERNS =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will also be used to filter the input files that are used for
+# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES).
+# The default value is: NO.
+
+FILTER_SOURCE_FILES = NO
+
+# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file
+# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and
+# it is also possible to disable source filtering for a specific pattern using
+# *.ext= (so without naming a filter).
+# This tag requires that the tag FILTER_SOURCE_FILES is set to YES.
+
+FILTER_SOURCE_PATTERNS =
+
+# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that
+# is part of the input, its contents will be placed on the main page
+# (index.html). This can be useful if you have a project on for instance GitHub
+# and want to reuse the introduction page also for the doxygen output.
+
+USE_MDFILE_AS_MAINPAGE =
+
+#---------------------------------------------------------------------------
+# Configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will be
+# generated. Documented entities will be cross-referenced with these sources.
+#
+# Note: To get rid of all source code in the generated output, make sure that
+# also VERBATIM_HEADERS is set to NO.
+# The default value is: NO.
+
+SOURCE_BROWSER = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body of functions,
+# classes and enums directly into the documentation.
+# The default value is: NO.
+
+INLINE_SOURCES = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any
+# special comment blocks from generated source code fragments. Normal C, C++ and
+# Fortran comments will always remain visible.
+# The default value is: YES.
+
+STRIP_CODE_COMMENTS = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES then for each documented
+# function all documented functions referencing it will be listed.
+# The default value is: NO.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES then for each documented function
+# all documented entities called/used by that function will be listed.
+# The default value is: NO.
+
+REFERENCES_RELATION = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set
+# to YES then the hyperlinks from functions in REFERENCES_RELATION and
+# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will
+# link to the documentation.
+# The default value is: YES.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the
+# source code will show a tooltip with additional information such as prototype,
+# brief description and links to the definition and documentation. Since this
+# will make the HTML file larger and loading of large files a bit slower, you
+# can opt to disable this feature.
+# The default value is: YES.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+SOURCE_TOOLTIPS = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code will
+# point to the HTML generated by the htags(1) tool instead of doxygen built-in
+# source browser. The htags tool is part of GNU's global source tagging system
+# (see https://www.gnu.org/software/global/global.html). You will need version
+# 4.8.6 or higher.
+#
+# To use it do the following:
+# - Install the latest version of global
+# - Enable SOURCE_BROWSER and USE_HTAGS in the config file
+# - Make sure the INPUT points to the root of the source tree
+# - Run doxygen as normal
+#
+# Doxygen will invoke htags (and that will in turn invoke gtags), so these
+# tools must be available from the command line (i.e. in the search path).
+#
+# The result: instead of the source browser generated by doxygen, the links to
+# source code will now point to the output of htags.
+# The default value is: NO.
+# This tag requires that the tag SOURCE_BROWSER is set to YES.
+
+USE_HTAGS = NO
+
+# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a
+# verbatim copy of the header file for each class for which an include is
+# specified. Set to NO to disable this.
+# See also: Section \class.
+# The default value is: YES.
+
+VERBATIM_HEADERS = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all
+# compounds will be generated. Enable this if the project contains a lot of
+# classes, structs, unions or interfaces.
+# The default value is: YES.
+
+ALPHABETICAL_INDEX = YES
+
+# The COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns in
+# which the alphabetical index list will be split.
+# Minimum value: 1, maximum value: 20, default value: 5.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+COLS_IN_ALPHA_INDEX = 5
+
+# In case all classes in a project start with a common prefix, all classes will
+# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag
+# can be used to specify a prefix (or a list of prefixes) that should be ignored
+# while generating the index headers.
+# This tag requires that the tag ALPHABETICAL_INDEX is set to YES.
+
+IGNORE_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output
+# The default value is: YES.
+
+GENERATE_HTML = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_OUTPUT = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each
+# generated HTML page (for example: .htm, .php, .asp).
+# The default value is: .html.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FILE_EXTENSION = .html
+
+# The HTML_HEADER tag can be used to specify a user-defined HTML header file for
+# each generated HTML page. If the tag is left blank doxygen will generate a
+# standard header.
+#
+# To get valid HTML the header file that includes any scripts and style sheets
+# that doxygen needs, which is dependent on the configuration options used (e.g.
+# the setting GENERATE_TREEVIEW). It is highly recommended to start with a
+# default header using
+# doxygen -w html new_header.html new_footer.html new_stylesheet.css
+# YourConfigFile
+# and then modify the file new_header.html. See also section "Doxygen usage"
+# for information on how to generate the default header that doxygen normally
+# uses.
+# Note: The header is subject to change so you typically have to regenerate the
+# default header when upgrading to a newer version of doxygen. For a description
+# of the possible markers and block names see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_HEADER =
+
+# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
+# generated HTML page. If the tag is left blank doxygen will generate a standard
+# footer. See HTML_HEADER for more information on how to generate a default
+# footer and what special commands can be used inside the footer. See also
+# section "Doxygen usage" for information on how to generate the default footer
+# that doxygen normally uses.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_FOOTER =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
+# sheet that is used by each HTML page. It can be used to fine-tune the look of
+# the HTML output. If left blank doxygen will generate a default style sheet.
+# See also section "Doxygen usage" for information on how to generate the style
+# sheet that doxygen normally uses.
+# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as
+# it is more robust and this tag (HTML_STYLESHEET) will in the future become
+# obsolete.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_STYLESHEET =
+
+# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# cascading style sheets that are included after the standard style sheets
+# created by doxygen. Using this option one can overrule certain style aspects.
+# This is preferred over using HTML_STYLESHEET since it does not replace the
+# standard style sheet and is therefore more robust against future updates.
+# Doxygen will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list). For an example see the documentation.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_STYLESHEET =
+
+# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the HTML output directory. Note
+# that these files will be copied to the base HTML output directory. Use the
+# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these
+# files. In the HTML_STYLESHEET file, use the file name only. Also note that the
+# files will be copied as-is; there are no commands or markers available.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_EXTRA_FILES =
+
+# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen
+# will adjust the colors in the style sheet and background images according to
+# this color. Hue is specified as an angle on a colorwheel, see
+# https://en.wikipedia.org/wiki/Hue for more information. For instance the value
+# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300
+# purple, and 360 is red again.
+# Minimum value: 0, maximum value: 359, default value: 220.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_HUE = 220
+
+# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors
+# in the HTML output. For a value of 0 the output will use grayscales only. A
+# value of 255 will produce the most vivid colors.
+# Minimum value: 0, maximum value: 255, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_SAT = 100
+
+# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the
+# luminance component of the colors in the HTML output. Values below 100
+# gradually make the output lighter, whereas values above 100 make the output
+# darker. The value divided by 100 is the actual gamma applied, so 80 represents
+# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not
+# change the gamma.
+# Minimum value: 40, maximum value: 240, default value: 80.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_COLORSTYLE_GAMMA = 80
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting this
+# to YES can help to show when doxygen was last run and thus if the
+# documentation is up to date.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_TIMESTAMP = YES
+
+# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML
+# documentation will contain a main index with vertical navigation menus that
+# are dynamically created via Javascript. If disabled, the navigation index will
+# consists of multiple levels of tabs that are statically embedded in every HTML
+# page. Disable this option to support browsers that do not have Javascript,
+# like the Qt help browser.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_MENUS = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_DYNAMIC_SECTIONS = NO
+
+# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries
+# shown in the various tree structured indices initially; the user can expand
+# and collapse entries dynamically later on. Doxygen will expand the tree to
+# such a level that at most the specified number of entries are visible (unless
+# a fully collapsed tree already exceeds this amount). So setting the number of
+# entries 1 will produce a full collapsed tree by default. 0 is a special value
+# representing an infinite number of entries and will result in a full expanded
+# tree by default.
+# Minimum value: 0, maximum value: 9999, default value: 100.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+HTML_INDEX_NUM_ENTRIES = 100
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files will be
+# generated that can be used as input for Apple's Xcode 3 integrated development
+# environment (see: https://developer.apple.com/tools/xcode/), introduced with
+# OSX 10.5 (Leopard). To create a documentation set, doxygen will generate a
+# Makefile in the HTML output directory. Running make will produce the docset in
+# that directory and running make install will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at
+# startup. See https://developer.apple.com/tools/creatingdocsetswithdoxygen.html
+# for more information.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_DOCSET = NO
+
+# This tag determines the name of the docset feed. A documentation feed provides
+# an umbrella under which multiple documentation sets from a single provider
+# (such as a company or product suite) can be grouped.
+# The default value is: Doxygen generated docs.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_FEEDNAME = "Doxygen generated docs"
+
+# This tag specifies a string that should uniquely identify the documentation
+# set bundle. This should be a reverse domain-name style string, e.g.
+# com.mycompany.MyDocSet. Doxygen will append .docset to the name.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_BUNDLE_ID = org.doxygen.Project
+
+# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify
+# the documentation publisher. This should be a reverse domain-name style
+# string, e.g. com.mycompany.MyDocSet.documentation.
+# The default value is: org.doxygen.Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_ID = org.doxygen.Publisher
+
+# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher.
+# The default value is: Publisher.
+# This tag requires that the tag GENERATE_DOCSET is set to YES.
+
+DOCSET_PUBLISHER_NAME = Publisher
+
+# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three
+# additional HTML index files: index.hhp, index.hhc, and index.hhk. The
+# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop
+# (see: http://www.microsoft.com/en-us/download/details.aspx?id=21138) on
+# Windows.
+#
+# The HTML Help Workshop contains a compiler that can convert all HTML output
+# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML
+# files are now used as the Windows 98 help format, and will replace the old
+# Windows help format (.hlp) on all Windows platforms in the future. Compressed
+# HTML files also contain an index, a table of contents, and you can search for
+# words in the documentation. The HTML workshop also contains a viewer for
+# compressed HTML files.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_HTMLHELP = NO
+
+# The CHM_FILE tag can be used to specify the file name of the resulting .chm
+# file. You can add a path in front of the file if the result should not be
+# written to the html output directory.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_FILE =
+
+# The HHC_LOCATION tag can be used to specify the location (absolute path
+# including file name) of the HTML help compiler (hhc.exe). If non-empty,
+# doxygen will try to run the HTML help compiler on the generated index.hhp.
+# The file has to be specified with full path.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+HHC_LOCATION =
+
+# The GENERATE_CHI flag controls if a separate .chi index file is generated
+# (YES) or that it should be included in the master .chm file (NO).
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+GENERATE_CHI = NO
+
+# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc)
+# and project file content.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+CHM_INDEX_ENCODING =
+
+# The BINARY_TOC flag controls whether a binary table of contents is generated
+# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it
+# enables the Previous and Next buttons.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+BINARY_TOC = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members to
+# the table of contents of the HTML help documentation and to the tree view.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTMLHELP is set to YES.
+
+TOC_EXPAND = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and
+# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that
+# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help
+# (.qch) of the generated HTML documentation.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_QHP = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify
+# the file name of the resulting .qch file. The path specified is relative to
+# the HTML output folder.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QCH_FILE =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help
+# Project output. For more information please see Qt Help Project / Namespace
+# (see: http://doc.qt.io/qt-4.8/qthelpproject.html#namespace).
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_NAMESPACE = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt
+# Help Project output. For more information please see Qt Help Project / Virtual
+# Folders (see: http://doc.qt.io/qt-4.8/qthelpproject.html#virtual-folders).
+# The default value is: doc.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_VIRTUAL_FOLDER = doc
+
+# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom
+# filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://doc.qt.io/qt-4.8/qthelpproject.html#custom-filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_NAME =
+
+# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the
+# custom filter to add. For more information please see Qt Help Project / Custom
+# Filters (see: http://doc.qt.io/qt-4.8/qthelpproject.html#custom-filters).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_CUST_FILTER_ATTRS =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this
+# project's filter section matches. Qt Help Project / Filter Attributes (see:
+# http://doc.qt.io/qt-4.8/qthelpproject.html#filter-attributes).
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHP_SECT_FILTER_ATTRS =
+
+# The QHG_LOCATION tag can be used to specify the location of Qt's
+# qhelpgenerator. If non-empty doxygen will try to run qhelpgenerator on the
+# generated .qhp file.
+# This tag requires that the tag GENERATE_QHP is set to YES.
+
+QHG_LOCATION =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be
+# generated, together with the HTML files, they form an Eclipse help plugin. To
+# install this plugin and make it available under the help contents menu in
+# Eclipse, the contents of the directory containing the HTML and XML files needs
+# to be copied into the plugins directory of eclipse. The name of the directory
+# within the plugins directory should be the same as the ECLIPSE_DOC_ID value.
+# After copying Eclipse needs to be restarted before the help appears.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_ECLIPSEHELP = NO
+
+# A unique identifier for the Eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have this
+# name. Each documentation set should have its own identifier.
+# The default value is: org.doxygen.Project.
+# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES.
+
+ECLIPSE_DOC_ID = org.doxygen.Project
+
+# If you want full control over the layout of the generated HTML pages it might
+# be necessary to disable the index and replace it with your own. The
+# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top
+# of each HTML page. A value of NO enables the index and the value YES disables
+# it. Since the tabs in the index contain the same information as the navigation
+# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+DISABLE_INDEX = NO
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information. If the tag
+# value is set to YES, a side panel will be generated containing a tree-like
+# index structure (just like the one that is generated for HTML Help). For this
+# to work a browser that supports JavaScript, DHTML, CSS and frames is required
+# (i.e. any modern browser). Windows users are probably better off using the
+# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can
+# further fine-tune the look of the index. As an example, the default style
+# sheet generated by doxygen has an example that shows how to put an image at
+# the root of the tree instead of the PROJECT_NAME. Since the tree basically has
+# the same information as the tab index, you could consider setting
+# DISABLE_INDEX to YES when enabling this option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+GENERATE_TREEVIEW = NO
+
+# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that
+# doxygen will group on one line in the generated HTML documentation.
+#
+# Note that a value of 0 will completely suppress the enum values from appearing
+# in the overview section.
+# Minimum value: 0, maximum value: 20, default value: 4.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+ENUM_VALUES_PER_LINE = 4
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used
+# to set the initial width (in pixels) of the frame in which the tree is shown.
+# Minimum value: 0, maximum value: 1500, default value: 250.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+TREEVIEW_WIDTH = 250
+
+# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to
+# external symbols imported via tag files in a separate window.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+EXT_LINKS_IN_WINDOW = NO
+
+# Use this tag to change the font size of LaTeX formulas included as images in
+# the HTML documentation. When you change the font size after a successful
+# doxygen run you need to manually remove any form_*.png images from the HTML
+# output directory to force them to be regenerated.
+# Minimum value: 8, maximum value: 50, default value: 10.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_FONTSIZE = 10
+
+# Use the FORMULA_TRANSPARENT tag to determine whether or not the images
+# generated for formulas are transparent PNGs. Transparent PNGs are not
+# supported properly for IE 6.0, but are supported on all modern browsers.
+#
+# Note that when changing this option you need to delete any form_*.png files in
+# the HTML output directory before the changes have effect.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+FORMULA_TRANSPARENT = YES
+
+# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see
+# https://www.mathjax.org) which uses client side Javascript for the rendering
+# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX
+# installed or if you want to formulas look prettier in the HTML output. When
+# enabled you may also need to install MathJax separately and configure the path
+# to it using the MATHJAX_RELPATH option.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+USE_MATHJAX = NO
+
+# When MathJax is enabled you can set the default output format to be used for
+# the MathJax output. See the MathJax site (see:
+# http://docs.mathjax.org/en/latest/output.html) for more details.
+# Possible values are: HTML-CSS (which is slower, but has the best
+# compatibility), NativeMML (i.e. MathML) and SVG.
+# The default value is: HTML-CSS.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_FORMAT = HTML-CSS
+
+# When MathJax is enabled you need to specify the location relative to the HTML
+# output directory using the MATHJAX_RELPATH option. The destination directory
+# should contain the MathJax.js script. For instance, if the mathjax directory
+# is located at the same level as the HTML output directory, then
+# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax
+# Content Delivery Network so you can quickly see the result without installing
+# MathJax. However, it is strongly recommended to install a local copy of
+# MathJax from https://www.mathjax.org before deployment.
+# The default value is: https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest
+
+# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax
+# extension names that should be enabled during MathJax rendering. For example
+# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_EXTENSIONS =
+
+# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces
+# of code that will be used on startup of the MathJax code. See the MathJax site
+# (see: http://docs.mathjax.org/en/latest/output.html) for more details. For an
+# example see the documentation.
+# This tag requires that the tag USE_MATHJAX is set to YES.
+
+MATHJAX_CODEFILE =
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for
+# the HTML output. The underlying search engine uses javascript and DHTML and
+# should work on any modern browser. Note that when using HTML help
+# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET)
+# there is already a search function so this one should typically be disabled.
+# For large projects the javascript based search engine can be slow, then
+# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to
+# search using the keyboard; to jump to the search box use + S
+# (what the is depends on the OS and browser, but it is typically
+# , /, or both). Inside the search box use the to jump into the search results window, the results can be navigated
+# using the . Press to select an item or to cancel
+# the search. The filter options can be selected when the cursor is inside the
+# search box by pressing +. Also here use the
+# to select a filter and or to activate or cancel the filter
+# option.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_HTML is set to YES.
+
+SEARCHENGINE = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be
+# implemented using a web server instead of a web client using Javascript. There
+# are two flavors of web server based searching depending on the EXTERNAL_SEARCH
+# setting. When disabled, doxygen will generate a PHP script for searching and
+# an index file used by the script. When EXTERNAL_SEARCH is enabled the indexing
+# and searching needs to be provided by external tools. See the section
+# "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SERVER_BASED_SEARCH = NO
+
+# When EXTERNAL_SEARCH tag is enabled doxygen will no longer generate the PHP
+# script for searching. Instead the search results are written to an XML file
+# which needs to be processed by an external indexer. Doxygen will invoke an
+# external search engine pointed to by the SEARCHENGINE_URL option to obtain the
+# search results.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: https://xapian.org/).
+#
+# See the section "External Indexing and Searching" for details.
+# The default value is: NO.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH = NO
+
+# The SEARCHENGINE_URL should point to a search engine hosted by a web server
+# which will return the search results when EXTERNAL_SEARCH is enabled.
+#
+# Doxygen ships with an example indexer (doxyindexer) and search engine
+# (doxysearch.cgi) which are based on the open source search engine library
+# Xapian (see: https://xapian.org/). See the section "External Indexing and
+# Searching" for details.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHENGINE_URL =
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the unindexed
+# search data is written to a file for indexing by an external tool. With the
+# SEARCHDATA_FILE tag the name of this file can be specified.
+# The default file is: searchdata.xml.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+SEARCHDATA_FILE = searchdata.xml
+
+# When SERVER_BASED_SEARCH and EXTERNAL_SEARCH are both enabled the
+# EXTERNAL_SEARCH_ID tag can be used as an identifier for the project. This is
+# useful in combination with EXTRA_SEARCH_MAPPINGS to search through multiple
+# projects and redirect the results back to the right project.
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTERNAL_SEARCH_ID =
+
+# The EXTRA_SEARCH_MAPPINGS tag can be used to enable searching through doxygen
+# projects other than the one defined by this configuration file, but that are
+# all added to the same external search index. Each project needs to have a
+# unique id set via EXTERNAL_SEARCH_ID. The search mapping then maps the id of
+# to a relative location where the documentation can be found. The format is:
+# EXTRA_SEARCH_MAPPINGS = tagname1=loc1 tagname2=loc2 ...
+# This tag requires that the tag SEARCHENGINE is set to YES.
+
+EXTRA_SEARCH_MAPPINGS =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES, doxygen will generate LaTeX output.
+# The default value is: YES.
+
+GENERATE_LATEX = YES
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_OUTPUT = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked.
+#
+# Note that when enabling USE_PDFLATEX this option is only used for generating
+# bitmaps for formulas in the HTML output, but not in the Makefile that is
+# written to the output directory.
+# The default file is: latex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_CMD_NAME = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to generate
+# index for LaTeX.
+# The default file is: makeindex.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+MAKEINDEX_CMD_NAME = makeindex
+
+# If the COMPACT_LATEX tag is set to YES, doxygen generates more compact LaTeX
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+COMPACT_LATEX = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used by the
+# printer.
+# Possible values are: a4 (210 x 297 mm), letter (8.5 x 11 inches), legal (8.5 x
+# 14 inches) and executive (7.25 x 10.5 inches).
+# The default value is: a4.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PAPER_TYPE = a4
+
+# The EXTRA_PACKAGES tag can be used to specify one or more LaTeX package names
+# that should be included in the LaTeX output. The package can be specified just
+# by its name or with the correct syntax as to be used with the LaTeX
+# \usepackage command. To get the times font for instance you can specify :
+# EXTRA_PACKAGES=times or EXTRA_PACKAGES={times}
+# To use the option intlimits with the amsmath package you can specify:
+# EXTRA_PACKAGES=[intlimits]{amsmath}
+# If left blank no extra packages will be included.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+EXTRA_PACKAGES = amsmath
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the
+# generated LaTeX document. The header should contain everything until the first
+# chapter. If it is left blank doxygen will generate a standard header. See
+# section "Doxygen usage" for information on how to let doxygen write the
+# default header to a separate file.
+#
+# Note: Only use a user-defined header if you know what you are doing! The
+# following commands have a special meaning inside the header: $title,
+# $datetime, $date, $doxygenversion, $projectname, $projectnumber,
+# $projectbrief, $projectlogo. Doxygen will replace $title with the empty
+# string, for the replacement values of the other commands the user is referred
+# to HTML_HEADER.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HEADER =
+
+# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the
+# generated LaTeX document. The footer should contain everything after the last
+# chapter. If it is left blank doxygen will generate a standard footer. See
+# LATEX_HEADER for more information on how to generate a default footer and what
+# special commands can be used inside the footer.
+#
+# Note: Only use a user-defined footer if you know what you are doing!
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_FOOTER =
+
+# The LATEX_EXTRA_STYLESHEET tag can be used to specify additional user-defined
+# LaTeX style sheets that are included after the standard style sheets created
+# by doxygen. Using this option one can overrule certain style aspects. Doxygen
+# will copy the style sheet files to the output directory.
+# Note: The order of the extra style sheet files is of importance (e.g. the last
+# style sheet in the list overrules the setting of the previous ones in the
+# list).
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_STYLESHEET =
+
+# The LATEX_EXTRA_FILES tag can be used to specify one or more extra images or
+# other source files which should be copied to the LATEX_OUTPUT output
+# directory. Note that the files will be copied as-is; there are no commands or
+# markers available.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_EXTRA_FILES =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated is
+# prepared for conversion to PDF (using ps2pdf or pdflatex). The PDF file will
+# contain links (just like the HTML output) instead of page references. This
+# makes the output suitable for online browsing using a PDF viewer.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+PDF_HYPERLINKS = YES
+
+# If the USE_PDFLATEX tag is set to YES, doxygen will use pdflatex to generate
+# the PDF file directly from the LaTeX files. Set this option to YES, to get a
+# higher quality PDF documentation.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+USE_PDFLATEX = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode
+# command to the generated LaTeX files. This will instruct LaTeX to keep running
+# if errors occur, instead of asking the user for help. This option is also used
+# when generating formulas in HTML.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BATCHMODE = NO
+
+# If the LATEX_HIDE_INDICES tag is set to YES then doxygen will not include the
+# index chapters (such as File Index, Compound Index, etc.) in the output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_HIDE_INDICES = NO
+
+# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source
+# code with syntax highlighting in the LaTeX output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_SOURCE_CODE = NO
+
+# The LATEX_BIB_STYLE tag can be used to specify the style to use for the
+# bibliography, e.g. plainnat, or ieeetr. See
+# https://en.wikipedia.org/wiki/BibTeX and \cite for more info.
+# The default value is: plain.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_BIB_STYLE = plain
+
+# If the LATEX_TIMESTAMP tag is set to YES then the footer of each generated
+# page will contain the date and time when the page was generated. Setting this
+# to NO can help when comparing the output of multiple runs.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_LATEX is set to YES.
+
+LATEX_TIMESTAMP = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES, doxygen will generate RTF output. The
+# RTF output is optimized for Word 97 and may not look too pretty with other RTF
+# readers/editors.
+# The default value is: NO.
+
+GENERATE_RTF = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: rtf.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_OUTPUT = rtf
+
+# If the COMPACT_RTF tag is set to YES, doxygen generates more compact RTF
+# documents. This may be useful for small projects and may help to save some
+# trees in general.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+COMPACT_RTF = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated will
+# contain hyperlink fields. The RTF file will contain links (just like the HTML
+# output) instead of page references. This makes the output suitable for online
+# browsing using Word or some other Word compatible readers that support those
+# fields.
+#
+# Note: WordPad (write) and others do not support links.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_HYPERLINKS = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's config
+# file, i.e. a series of assignments. You only have to provide replacements,
+# missing definitions are set to their default value.
+#
+# See also section "Doxygen usage" for information on how to generate the
+# default style sheet that doxygen normally uses.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_STYLESHEET_FILE =
+
+# Set optional variables used in the generation of an RTF document. Syntax is
+# similar to doxygen's config file. A template extensions file can be generated
+# using doxygen -e rtf extensionFile.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_EXTENSIONS_FILE =
+
+# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code
+# with syntax highlighting in the RTF output.
+#
+# Note that which sources are shown also depends on other settings such as
+# SOURCE_BROWSER.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_RTF is set to YES.
+
+RTF_SOURCE_CODE = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES, doxygen will generate man pages for
+# classes and files.
+# The default value is: NO.
+
+GENERATE_MAN = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it. A directory man3 will be created inside the directory specified by
+# MAN_OUTPUT.
+# The default directory is: man.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_OUTPUT = man
+
+# The MAN_EXTENSION tag determines the extension that is added to the generated
+# man pages. In case the manual section does not start with a number, the number
+# 3 is prepended. The dot (.) at the beginning of the MAN_EXTENSION tag is
+# optional.
+# The default value is: .3.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_EXTENSION = .3
+
+# The MAN_SUBDIR tag determines the name of the directory created within
+# MAN_OUTPUT in which the man pages are placed. If defaults to man followed by
+# MAN_EXTENSION with the initial . removed.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_SUBDIR =
+
+# If the MAN_LINKS tag is set to YES and doxygen generates man output, then it
+# will generate one additional man file for each entity documented in the real
+# man page(s). These additional files only source the real man page, but without
+# them the man command would be unable to find the correct page.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_MAN is set to YES.
+
+MAN_LINKS = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES, doxygen will generate an XML file that
+# captures the structure of the code including all documentation.
+# The default value is: NO.
+
+GENERATE_XML = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. If a
+# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of
+# it.
+# The default directory is: xml.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_OUTPUT = xml
+
+# If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program
+# listings (including syntax highlighting and cross-referencing information) to
+# the XML output. Note that enabling this will significantly increase the size
+# of the XML output.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_XML is set to YES.
+
+XML_PROGRAMLISTING = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to the DOCBOOK output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_DOCBOOK tag is set to YES, doxygen will generate Docbook files
+# that can be used to generate PDF.
+# The default value is: NO.
+
+GENERATE_DOCBOOK = NO
+
+# The DOCBOOK_OUTPUT tag is used to specify where the Docbook pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be put in
+# front of it.
+# The default directory is: docbook.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_OUTPUT = docbook
+
+# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the
+# program listings (including syntax highlighting and cross-referencing
+# information) to the DOCBOOK output. Note that enabling this will significantly
+# increase the size of the DOCBOOK output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_DOCBOOK is set to YES.
+
+DOCBOOK_PROGRAMLISTING = NO
+
+#---------------------------------------------------------------------------
+# Configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES, doxygen will generate an
+# AutoGen Definitions (see http://autogen.sourceforge.net/) file that captures
+# the structure of the code including all documentation. Note that this feature
+# is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_AUTOGEN_DEF = NO
+
+#---------------------------------------------------------------------------
+# Configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES, doxygen will generate a Perl module
+# file that captures the structure of the code including all documentation.
+#
+# Note that this feature is still experimental and incomplete at the moment.
+# The default value is: NO.
+
+GENERATE_PERLMOD = NO
+
+# If the PERLMOD_LATEX tag is set to YES, doxygen will generate the necessary
+# Makefile rules, Perl scripts and LaTeX code to be able to generate PDF and DVI
+# output from the Perl module output.
+# The default value is: NO.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_LATEX = NO
+
+# If the PERLMOD_PRETTY tag is set to YES, the Perl module output will be nicely
+# formatted so it can be parsed by a human reader. This is useful if you want to
+# understand what is going on. On the other hand, if this tag is set to NO, the
+# size of the Perl module output will be much smaller and Perl will parse it
+# just the same.
+# The default value is: YES.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_PRETTY = YES
+
+# The names of the make variables in the generated doxyrules.make file are
+# prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. This is useful
+# so different doxyrules.make files included by the same Makefile don't
+# overwrite each other's variables.
+# This tag requires that the tag GENERATE_PERLMOD is set to YES.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES, doxygen will evaluate all
+# C-preprocessor directives found in the sources and include files.
+# The default value is: YES.
+
+ENABLE_PREPROCESSING = YES
+
+# If the MACRO_EXPANSION tag is set to YES, doxygen will expand all macro names
+# in the source code. If set to NO, only conditional compilation will be
+# performed. Macro expansion can be done in a controlled way by setting
+# EXPAND_ONLY_PREDEF to YES.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+MACRO_EXPANSION = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
+# the macro expansion is limited to the macros specified with the PREDEFINED and
+# EXPAND_AS_DEFINED tags.
+# The default value is: NO.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_ONLY_PREDEF = NO
+
+# If the SEARCH_INCLUDES tag is set to YES, the include files in the
+# INCLUDE_PATH will be searched if a #include is found.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SEARCH_INCLUDES = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by the
+# preprocessor.
+# This tag requires that the tag SEARCH_INCLUDES is set to YES.
+
+INCLUDE_PATH =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will be
+# used.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+INCLUDE_FILE_PATTERNS =
+
+# The PREDEFINED tag can be used to specify one or more macro names that are
+# defined before the preprocessor is started (similar to the -D option of e.g.
+# gcc). The argument of the tag is a list of macros of the form: name or
+# name=definition (no spaces). If the definition and the "=" are omitted, "=1"
+# is assumed. To prevent a macro definition from being undefined via #undef or
+# recursively expanded use the := operator instead of the = operator.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+PREDEFINED = __DOXYGEN__
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
+# tag can be used to specify a list of macro names that should be expanded. The
+# macro definition that is found in the sources will be used. Use the PREDEFINED
+# tag if you want to use a different macro definition that overrules the
+# definition found in the source code.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+EXPAND_AS_DEFINED =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES then doxygen's preprocessor will
+# remove all references to function-like macros that are alone on a line, have
+# an all uppercase name, and do not end with a semicolon. Such function macros
+# are typically used for boiler-plate code, and will confuse the parser if not
+# removed.
+# The default value is: YES.
+# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
+
+SKIP_FUNCTION_MACROS = YES
+
+#---------------------------------------------------------------------------
+# Configuration options related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES tag can be used to specify one or more tag files. For each tag
+# file the location of the external documentation should be added. The format of
+# a tag file without this location is as follows:
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where loc1 and loc2 can be relative or absolute paths or URLs. See the
+# section "Linking to external documentation" for more information about the use
+# of tag files.
+# Note: Each tag file must have a unique name (where the name does NOT include
+# the path). If a tag file is not located in the directory in which doxygen is
+# run, you must also specify the path to the tagfile here.
+
+TAGFILES =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
+# tag file that is based on the input files it reads. See section "Linking to
+# external documentation" for more information about the usage of tag files.
+
+GENERATE_TAGFILE =
+
+# If the ALLEXTERNALS tag is set to YES, all external class will be listed in
+# the class index. If set to NO, only the inherited external classes will be
+# listed.
+# The default value is: NO.
+
+ALLEXTERNALS = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES, all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will be
+# listed.
+# The default value is: YES.
+
+EXTERNAL_GROUPS = YES
+
+# If the EXTERNAL_PAGES tag is set to YES, all external pages will be listed in
+# the related pages index. If set to NO, only the current project's pages will
+# be listed.
+# The default value is: YES.
+
+EXTERNAL_PAGES = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of 'which perl').
+# The default file (with absolute path) is: /usr/bin/perl.
+
+PERL_PATH = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram
+# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to
+# NO turns the diagrams off. Note that this option also works with HAVE_DOT
+# disabled, but it is recommended to install and use dot, since it yields more
+# powerful graphs.
+# The default value is: YES.
+
+CLASS_DIAGRAMS = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see:
+# http://www.mcternan.me.uk/mscgen/)) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH =
+
+# You can include diagrams made with dia in doxygen documentation. Doxygen will
+# then run dia to produce the diagram and insert it in the documentation. The
+# DIA_PATH tag allows you to specify the directory where the dia binary resides.
+# If left empty dia is assumed to be found in the default search path.
+
+DIA_PATH =
+
+# If set to YES the inheritance and collaboration graphs will hide inheritance
+# and usage relations if the target is undocumented or is not a class.
+# The default value is: YES.
+
+HIDE_UNDOC_RELATIONS = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz (see:
+# http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent
+# Bell Labs. The other options in this section have no effect if this option is
+# set to NO
+# The default value is: NO.
+
+HAVE_DOT = NO
+
+# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is allowed
+# to run in parallel. When set to 0 doxygen will base this on the number of
+# processors available in the system. You can set it explicitly to a value
+# larger than 0 to get control over the balance between CPU load and processing
+# speed.
+# Minimum value: 0, maximum value: 32, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_NUM_THREADS = 0
+
+# When you want a differently looking font in the dot files that doxygen
+# generates you can specify the font name using DOT_FONTNAME. You need to make
+# sure dot is able to find the font, which can be done by putting it in a
+# standard location or by setting the DOTFONTPATH environment variable or by
+# setting DOT_FONTPATH to the directory containing the font.
+# The default value is: Helvetica.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTNAME = Helvetica
+
+# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of
+# dot graphs.
+# Minimum value: 4, maximum value: 24, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTSIZE = 10
+
+# By default doxygen will tell dot to use the default font as specified with
+# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set
+# the path where dot can find it using this tag.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_FONTPATH =
+
+# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for
+# each documented class showing the direct and indirect inheritance relations.
+# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CLASS_GRAPH = YES
+
+# If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a
+# graph for each documented class showing the direct and indirect implementation
+# dependencies (inheritance, containment, and class references variables) of the
+# class with other documented classes.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+COLLABORATION_GRAPH = YES
+
+# If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for
+# groups, showing the direct groups dependencies.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GROUP_GRAPHS = YES
+
+# If the UML_LOOK tag is set to YES, doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LOOK = NO
+
+# If the UML_LOOK tag is enabled, the fields and methods are shown inside the
+# class node. If there are many fields or methods and many nodes the graph may
+# become too big to be useful. The UML_LIMIT_NUM_FIELDS threshold limits the
+# number of items for each type to make the size more manageable. Set this to 0
+# for no limit. Note that the threshold may be exceeded by 50% before the limit
+# is enforced. So when you set the threshold to 10, up to 15 fields may appear,
+# but if the number exceeds 15, the total amount of fields shown is limited to
+# 10.
+# Minimum value: 0, maximum value: 100, default value: 10.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+UML_LIMIT_NUM_FIELDS = 10
+
+# If the TEMPLATE_RELATIONS tag is set to YES then the inheritance and
+# collaboration graphs will show the relations between templates and their
+# instances.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+TEMPLATE_RELATIONS = NO
+
+# If the INCLUDE_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are set to
+# YES then doxygen will generate a graph for each documented file showing the
+# direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDE_GRAPH = YES
+
+# If the INCLUDED_BY_GRAPH, ENABLE_PREPROCESSING and SEARCH_INCLUDES tags are
+# set to YES then doxygen will generate a graph for each documented file showing
+# the direct and indirect include dependencies of the file with other documented
+# files.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INCLUDED_BY_GRAPH = YES
+
+# If the CALL_GRAPH tag is set to YES then doxygen will generate a call
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable call graphs for selected
+# functions only using the \callgraph command. Disabling a call graph can be
+# accomplished by means of the command \hidecallgraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALL_GRAPH = NO
+
+# If the CALLER_GRAPH tag is set to YES then doxygen will generate a caller
+# dependency graph for every global function or class method.
+#
+# Note that enabling this option will significantly increase the time of a run.
+# So in most cases it will be better to enable caller graphs for selected
+# functions only using the \callergraph command. Disabling a caller graph can be
+# accomplished by means of the command \hidecallergraph.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+CALLER_GRAPH = NO
+
+# If the GRAPHICAL_HIERARCHY tag is set to YES then doxygen will graphical
+# hierarchy of all classes instead of a textual one.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GRAPHICAL_HIERARCHY = YES
+
+# If the DIRECTORY_GRAPH tag is set to YES then doxygen will show the
+# dependencies a directory has on other directories in a graphical way. The
+# dependency relations are determined by the #include relations between the
+# files in the directories.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DIRECTORY_GRAPH = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. For an explanation of the image formats see the section
+# output formats in the documentation of the dot tool (Graphviz (see:
+# http://www.graphviz.org/)).
+# Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order
+# to make the SVG files visible in IE 9+ (other browsers do not have this
+# requirement).
+# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo,
+# png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and
+# png:gdiplus:gdiplus.
+# The default value is: png.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_IMAGE_FORMAT = png
+
+# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to
+# enable generation of interactive SVG images that allow zooming and panning.
+#
+# Note that this requires a modern browser other than Internet Explorer. Tested
+# and working are Firefox, Chrome, Safari, and Opera.
+# Note: For IE 9+ you need to set HTML_FILE_EXTENSION to xhtml in order to make
+# the SVG files visible. Older versions of IE do not have SVG support.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+INTERACTIVE_SVG = NO
+
+# The DOT_PATH tag can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_PATH =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the \dotfile
+# command).
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOTFILE_DIRS =
+
+# The MSCFILE_DIRS tag can be used to specify one or more directories that
+# contain msc files that are included in the documentation (see the \mscfile
+# command).
+
+MSCFILE_DIRS =
+
+# The DIAFILE_DIRS tag can be used to specify one or more directories that
+# contain dia files that are included in the documentation (see the \diafile
+# command).
+
+DIAFILE_DIRS =
+
+# When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the
+# path where java can find the plantuml.jar file. If left blank, it is assumed
+# PlantUML is not used or called during a preprocessing step. Doxygen will
+# generate a warning when it encounters a \startuml command in this case and
+# will not generate output for the diagram.
+
+PLANTUML_JAR_PATH =
+
+# When using plantuml, the PLANTUML_CFG_FILE tag can be used to specify a
+# configuration file for plantuml.
+
+PLANTUML_CFG_FILE =
+
+# When using plantuml, the specified paths are searched for files specified by
+# the !include statement in a plantuml block.
+
+PLANTUML_INCLUDE_PATH =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of nodes
+# that will be shown in the graph. If the number of nodes in a graph becomes
+# larger than this value, doxygen will truncate the graph, which is visualized
+# by representing a node as a red box. Note that doxygen if the number of direct
+# children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note that
+# the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+# Minimum value: 0, maximum value: 10000, default value: 50.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_GRAPH_MAX_NODES = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the graphs
+# generated by dot. A depth value of 3 means that only nodes reachable from the
+# root by following a path via at most 3 edges will be shown. Nodes that lay
+# further from the root node will be omitted. Note that setting this option to 1
+# or 2 may greatly reduce the computation time needed for large code bases. Also
+# note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+# Minimum value: 0, maximum value: 1000, default value: 0.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+MAX_DOT_GRAPH_DEPTH = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not seem
+# to support this out of the box.
+#
+# Warning: Depending on the platform used, enabling this option may lead to
+# badly anti-aliased labels on the edges of a graph (i.e. they become hard to
+# read).
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_TRANSPARENT = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10) support
+# this, this feature is disabled by default.
+# The default value is: NO.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_MULTI_TARGETS = YES
+
+# If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page
+# explaining the meaning of the various boxes and arrows in the dot generated
+# graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+GENERATE_LEGEND = YES
+
+# If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate dot
+# files that are used to generate the various graphs.
+# The default value is: YES.
+# This tag requires that the tag HAVE_DOT is set to YES.
+
+DOT_CLEANUP = YES
diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt
index 319bdf093..5b5bd7f87 100644
--- a/include/CMakeLists.txt
+++ b/include/CMakeLists.txt
@@ -75,6 +75,7 @@ install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/graphblas/base/"
add_library( alp_utils_headers INTERFACE )
target_include_directories( alp_utils_headers INTERFACE
$
+ $
)
# copy utils headers
@@ -107,6 +108,11 @@ install( TARGETS backend_headers_nodefs EXPORT GraphBLASTargets
install( TARGETS alp_utils_headers EXPORT GraphBLASTargets
INCLUDES DESTINATION "${INCLUDE_INSTALL_DIR}"
)
+if( WITH_ALP_REFERENCE_BACKEND_HEADERS OR
+ WITH_ALP_OMP_BACKEND_HEADERS
+)
+ add_subdirectory( alp )
+endif()
if( WITH_REFERENCE_BACKEND_HEADERS )
add_library( backend_reference_headers INTERFACE )
diff --git a/include/alp.hpp b/include/alp.hpp
new file mode 100644
index 000000000..3ebc36637
--- /dev/null
+++ b/include/alp.hpp
@@ -0,0 +1,177 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifdef __DOXYGEN__
+
+/**
+ * Define this macro to disable libnuma use.
+ */
+#define _ALP_NO_LIBNUMA
+
+/**
+ * Define this macro to disable thread pinning.
+ */
+#define _ALP_NO_PINNING
+
+/**
+ * Defie this macro to compile with PlatformBSP support.
+ */
+#define _ALP_WITH_LPF
+
+/**
+ * Which GraphBLAS backend should be default.
+ *
+ * Known single user-process options:
+ * -# reference
+ * -# reference_omp
+ *
+ * Known multiple user-process options:
+ * -# BSP1D
+ */
+#define _ALP_BACKEND reference
+
+/**
+ * Which GraphBLAS backend the BSP1D backend should use within a single user
+ * process. For possible values, see the single user process options for
+ * #_ALP_BACKEND.
+ */
+#define _ALP_BSP1D_BACKEND
+
+/**
+ * \mainpage Algebraic Programming (ALP) API Specification.
+ *
+ * This document specifies the ALP API.
+ *
+ * \par Containers
+ *
+ * ALP defines the following containers for users to interface with:
+ * -# alp::Scalar
+ * -# alp::Vector
+ * -# alp::Matrix
+ *
+ * Containers take as a template argument \a T the type that the container
+ * stores. The type \a T can be any C++ plain-old-data type.
+ *
+ * ALP defines primitives for performing IO to and from containers in the
+ * \ref IO module.
+ *
+ * \par Algebraic structures
+ *
+ * ALP defines the following algebraic structures to interface with:
+ * -# All binary operators defined in alp::operators;
+ * -# identities defined in alp::identities;
+ * -# alp::Monoid structures by combining binary operators and identities;
+ * -# alp::Semiring structures by combining two operators and two identites.
+ *
+ * For example, a real semiring is composed as follows:
+ * \code
+ * alp::Semiring<
+ * alp::operators::add< double >, alp::operators::mul< double >,
+ * alp::identities::zero, alp::identities::one
+ * > reals;
+ * \endcode
+ * This semiring forms the basis of most numerical linear algebra.
+ *
+ * Our definition of monoid and semirings imply that the domains they operate
+ * over are derived from the operators. For example, to perform half precision
+ * multiplication and accumulate in single precision, the following semiring
+ * may be defined:
+ * \code
+ * alp::Semiring<
+ * alp::operators::add< short float, float, float >,
+ * alp::operators::mul< short float >,
+ * alp::identities::zero, alp::identities::one
+ * > mixedReals;
+ * \endcode
+ *
+ * \par Algebraic primitives
+ *
+ * Operations on containers proceed by calling ALP primitives, which are
+ * parametrised in the algebraic structure the operation should proceed with.
+ * Primitives are grouped in modules that follow roughly the traditional BLAS
+ * taxonomy:
+ * -# \ref BLAS0
+ * -# \ref BLAS1
+ * -# \ref BLAS2
+ * -# \ref BLAS3
+ *
+ * \par Algebraic structures and views
+ *
+ * Containers may have structures (e.g., symmetric) and views (e.g., transpose),
+ * and may be sparse or dense as per alp::Density. Operations are in principle
+ * defined for both sparse \em and dense containers, as well as mixtures of
+ * sparse and dense containers, provided that the right algebraic structures are
+ * provided -- for example, a sparse vector cannot be reduced into a scalar via
+ * alp::foldl when an (associative) operator is given; instead, a monoid
+ * structure is required in order to interpret any missing values in a sparse
+ * vector.
+ *
+ * Views allow for the selection of submatrices from a larger matrix, such as
+ * for example necessary to express Cholesky factorisation algorithms. Views are
+ * constructed through alp::get_view. Please see the slides for concrete
+ * examples.
+ */
+#endif
+
+#ifndef _H_ALP
+#define _H_ALP
+
+// do NOT remove this #if, in order to protect this header from
+// clang-format re-ordering
+#if 1
+// load active configuration
+ #include //defines _ALP_BACKEND
+#endif
+
+// #pragma message "Included ALP.hpp"
+
+// collects the user-level includes
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#ifdef _ALP_BACKEND
+// #pragma message "_ALP_BACKEND defined"
+// include also the main data types in order to have the default definitions
+// but ONLY if a default backend is define; otherwise, the previous headers
+// contain the relevant definitions (without defaults)
+ #include
+ #include
+ #include
+#endif
+
+#endif // end ``_H_ALP''
+
diff --git a/include/alp/CMakeLists.txt b/include/alp/CMakeLists.txt
new file mode 100644
index 000000000..d763bf935
--- /dev/null
+++ b/include/alp/CMakeLists.txt
@@ -0,0 +1,156 @@
+#
+# Copyright 2021 Huawei Technologies Co., Ltd.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+#
+# Definition of GraphBLAS include targets: all targets here defined
+# are interface targets for headers and basic definitions required to build
+# GraphBLAS backends and tests. Importing targets have all basic dependencies
+# and definitions to compile against each backend, but MUST explicitly
+# set a default backend (if they want to do so).
+#
+assert_defined_variables( ALP_REFERENCE_INCLUDE_DEFS WITH_ALP_REFERENCE_BACKEND_HEADERS )
+assert_defined_variables( ALP_OMP_INCLUDE_DEFS WITH_ALP_OMP_BACKEND_HEADERS )
+assert_defined_variables( ALP_DISPATCH_INCLUDE_DEFS WITH_ALP_DISPATCH_BACKEND_HEADERS )
+assert_valid_variables( INCLUDE_INSTALL_DIR )
+
+# to avoid flaky acrobatics with regex or glob expressions, copy main files directly
+install( FILES "../alp.hpp" DESTINATION "${INCLUDE_INSTALL_DIR}" )
+set( root_files
+ "../alp.hpp"
+ "backends.hpp"
+ "blas0.hpp"
+ "blas1.hpp"
+ "blas2.hpp"
+ "blas3.hpp"
+ "config.hpp"
+ "density.hpp"
+ "descriptors.hpp"
+ "exec.hpp"
+ "identities.hpp"
+ "imf.hpp"
+ "init.hpp"
+ "internalops.hpp"
+ "io.hpp"
+ "iomode.hpp"
+ "matrix.hpp"
+ "monoid.hpp"
+ "ops.hpp"
+ "phase.hpp"
+ "rc.hpp"
+ "rels.hpp"
+ "scalar.hpp"
+ "semiring.hpp"
+ "storage.hpp"
+ "structures.hpp"
+ "type_traits.hpp"
+ "utils.hpp"
+ "vector.hpp"
+ "views.hpp"
+)
+set( ALP_INCLUDE_INSTALL_DIR "${INCLUDE_INSTALL_DIR}/alp")
+install( FILES ${root_files} DESTINATION "${ALP_INCLUDE_INSTALL_DIR}" )
+
+# copy base headers and all its subdirectories (if any)
+# note: leave the slash "/" at the end of the DIRECTORY path,
+# othwerise CMake will create a "graphblas/base" directory inside DESTINATION !!!
+# https://cmake.org/cmake/help/latest/command/install.html#installing-directories
+install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/base/"
+ DESTINATION "${ALP_INCLUDE_INSTALL_DIR}/base"
+ FILES_MATCHING REGEX "${HEADERS_REGEX}"
+)
+
+# copy utils headers
+install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/utils/"
+ DESTINATION "${ALP_INCLUDE_INSTALL_DIR}/utils"
+ FILES_MATCHING REGEX "${HEADERS_REGEX}"
+)
+
+if( WITH_ALP_REFERENCE_BACKEND_HEADERS )
+ add_library( backend_alp_reference_headers INTERFACE )
+ target_link_libraries( backend_alp_reference_headers INTERFACE backend_headers_nodefs )
+ target_compile_definitions( backend_alp_reference_headers INTERFACE "${ALP_REFERENCE_INCLUDE_DEFS}" )
+
+ install( TARGETS backend_alp_reference_headers EXPORT GraphBLASTargets )
+endif()
+
+if( WITH_ALP_REFERENCE_BACKEND )
+ install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/alp/reference"
+ DESTINATION "${ALP_INCLUDE_INSTALL_DIR}/alp_reference"
+ FILES_MATCHING REGEX "${HEADERS_REGEX}"
+ )
+endif()
+
+if( WITH_ALP_DISPATCH_BACKEND_HEADERS OR ( WITH_ALP_OMP_BACKEND_HEADERS AND ( "${_ALP_SECONDARY_BACKEND}" EQUAL "dispatch" ) ) )
+ install( FILES "${CMAKE_BINARY_DIR}/blas_wrapper/alp_blas.h" DESTINATION "${INCLUDE_INSTALL_DIR}/blas_wrapper" )
+
+ install(
+ TARGETS cblas EXPORT GraphBLASTargets
+ INCLUDES DESTINATION "${INCLUDE_INSTALL_DIR}/blas_wrapper"
+ )
+endif()
+
+
+if( WITH_ALP_DISPATCH_BACKEND_HEADERS )
+ add_library( backend_alp_dispatch_headers INTERFACE )
+ target_link_libraries( backend_alp_dispatch_headers INTERFACE backend_headers_nodefs )
+ target_compile_definitions( backend_alp_dispatch_headers INTERFACE "${ALP_DISPATCH_INCLUDE_DEFS}" )
+ target_link_libraries( backend_alp_dispatch_headers INTERFACE cblas )
+
+ install( TARGETS backend_alp_dispatch_headers EXPORT GraphBLASTargets )
+endif()
+
+if( WITH_ALP_DISPATCH_BACKEND )
+ install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/dispatch"
+ DESTINATION "${ALP_INCLUDE_INSTALL_DIR}/alp_dispatch"
+ FILES_MATCHING REGEX "${HEADERS_REGEX}"
+ )
+endif()
+
+if( WITH_ALP_OMP_BACKEND_HEADERS )
+ add_library( backend_alp_omp_headers INTERFACE )
+ target_link_libraries( backend_alp_omp_headers INTERFACE backend_headers_nodefs )
+ target_link_libraries( backend_alp_omp_headers INTERFACE OpenMP::OpenMP_CXX )
+ target_link_libraries( backend_alp_omp_headers INTERFACE cblas )
+ target_compile_definitions( backend_alp_omp_headers INTERFACE "${ALP_OMP_INCLUDE_DEFS}" )
+
+ install( TARGETS backend_alp_omp_headers EXPORT GraphBLASTargets )
+endif()
+
+if( WITH_ALP_OMP_BACKEND )
+ install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/alp/omp"
+ DESTINATION "${ALP_INCLUDE_INSTALL_DIR}/alp_omp"
+ FILES_MATCHING REGEX "${HEADERS_REGEX}"
+ )
+endif()
+
+# this target lists the algorithms implemented on top of the generic functionalities,
+# hence it depends only on backend_headers_nodefs
+add_library( alp_algorithms INTERFACE )
+target_link_libraries( alp_algorithms INTERFACE backend_headers_nodefs )
+
+target_include_directories(
+ alp_algorithms INTERFACE
+
+ $
+ $
+)
+
+install( DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/algorithms/"
+ DESTINATION "${ALP_INCLUDE_INSTALL_DIR}/algorithms"
+ FILES_MATCHING REGEX "${HEADERS_REGEX}"
+)
+
+install( TARGETS alp_algorithms EXPORT GraphBLASTargets )
diff --git a/include/alp/algorithms/backsubstitution.hpp b/include/alp/algorithms/backsubstitution.hpp
new file mode 100644
index 000000000..e4c266687
--- /dev/null
+++ b/include/alp/algorithms/backsubstitution.hpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+
+#include
+#ifdef DEBUG
+#include "../../../tests/utils/print_alp_containers.hpp"
+#endif
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * Solves linear system Ax=b
+ * where A is UpperTriangular matrix, b is given RHS vector
+ * and x is the solution.
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[in] A input upper trinagular matrix
+ * @param[in] b input RHS vector
+ * @param[out] x solution vector
+ * @param[in] ring The semiring used in the computation
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename D = double,
+ typename View,
+ typename ImfR,
+ typename ImfC,
+ typename Vecx,
+ typename Vecb,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ typename Minus = operators::subtract< D >,
+ typename Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_vector< Vecx >::value &&
+ is_vector< Vecb >::value
+ > * = nullptr
+ >
+ RC backsubstitution(
+ Matrix< D, structures::UpperTriangular, Dense, View, ImfR, ImfC > &A,
+ Vecx &x,
+ Vecb &b,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+
+ RC rc = SUCCESS;
+
+ if( ( nrows( A ) != size( x ) ) || ( size( b ) != size( x ) ) ) {
+ std::cerr << "Incompatible sizes in trsv.\n";
+ return FAILED;
+ }
+
+ const size_t n = nrows( A );
+
+ for( size_t k = 0; k < n ; ++k ) {
+ Scalar< D > alpha( ring.template getZero< D >() );
+ const size_t i = n - k - 1;
+ //x[i]=(b[i]-A[i,i:].dot(x[i:]))/A[i,i]
+ auto A_i = get_view( A, i, utils::range( i, n ) );
+ auto A_ii = get_view( A, i, utils::range( i, i + 1 ) );
+ auto x_i = get_view( x, utils::range( i, i + 1 ) );
+ auto b_i = get_view( b, utils::range( i, i + 1 ) );
+ auto x_i_n = get_view( x, utils::range( i, n ) );
+ rc = rc ? rc : alp::dot( alpha, A_i, alp::conjugate( x_i_n ), ring );
+ rc = rc ? rc : alp::set( x_i, b_i );
+ rc = rc ? rc : alp::foldl( x_i, alpha, minus );
+ rc = rc ? rc : alp::set( alpha, Scalar< D >( ring.template getZero< D >() ) );
+ rc = rc ? rc : alp::foldl( alpha, A_ii, ring.getAdditiveMonoid() );
+ rc = rc ? rc : alp::foldl( x_i, alpha, divide );
+ }
+
+ return rc;
+ }
+
+ template<
+ typename D = double,
+ typename ViewA,
+ typename ImfRA,
+ typename ImfCA,
+ typename StructX,
+ typename ViewX,
+ typename ImfRX,
+ typename ImfCX,
+ typename StructB,
+ typename ViewB,
+ typename ImfRB,
+ typename ImfCB,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ typename Minus = operators::subtract< D >,
+ typename Divide = operators::divide< D >
+ >
+ RC backsubstitution(
+ Matrix< D, structures::UpperTriangular, Dense, ViewA, ImfRA, ImfCA > &A,
+ Matrix< D, StructX, Dense, ViewX, ImfRX, ImfCX > &X,
+ Matrix< D, StructB, Dense, ViewB, ImfRB, ImfCB > &B,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+
+ RC rc = SUCCESS;
+
+ if (
+ ( nrows( X ) != nrows( B ) ) ||
+ ( ncols( X ) != ncols( B ) ) ||
+ ( ncols( A ) != nrows( X ) )
+ ) {
+ std::cerr << "Incompatible sizes in trsm.\n";
+ return FAILED;
+ }
+
+ const size_t m = nrows( X );
+ const size_t n = ncols( X );
+
+ for( size_t i = 0; i < n ; ++i ) {
+ auto x = get_view( X, utils::range( 0, m ), i );
+ auto b = get_view( B, utils::range( 0, m ), i );
+ rc = rc ? rc : algorithms::backsubstitution( A, x, b, ring, minus, divide );
+ }
+
+ assert( rc == SUCCESS );
+ return rc;
+ }
+
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/cholesky.hpp b/include/alp/algorithms/cholesky.hpp
new file mode 100644
index 000000000..124e63319
--- /dev/null
+++ b/include/alp/algorithms/cholesky.hpp
@@ -0,0 +1,495 @@
+/*
+ * Copyright 2022 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+
+#include
+#include // use from grb
+#include
+#include
+#include "../../../tests/utils/print_alp_containers.hpp"
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * Computes the Cholesky decomposition U^TU = H of a real symmetric
+ * positive definite (SPD) (or complex Hermitian positive definite)
+ * matrix H where \a U is upper triangular.
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[out] U output upper triangular matrix
+ * @param[in] H input real symmetric positive definite matrix
+ * @param[in] ring The semiring used in the computation
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename MatU,
+ typename MatH,
+ typename D = typename MatU::value_type,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ typename Minus = operators::subtract< D >,
+ typename Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_matrix< MatU >::value &&
+ is_matrix< MatH >::value &&
+ structures::is_a< typename MatU::structure, structures::UpperTriangular >::value &&
+ // TODO: structures::SymmetricPositiveDefinite should be replced
+ // with structures::SymmetricPositiveDefinitePositiveDefinite
+ (
+ (
+ !grb::utils::is_complex< D >::value &&
+ structures::is_a< typename MatH::structure, structures::SymmetricPositiveDefinite >::value
+ ) || (
+ grb::utils::is_complex< D >::value &&
+ structures::is_a< typename MatH::structure, structures::HermitianPositiveDefinite >::value
+ )
+ ) &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value
+ > * = nullptr
+ >
+ RC cholesky_uptr(
+ MatU &U,
+ const MatH &H,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+#ifdef DEBUG
+ std::cout << "Entered cholesky_uptr out-of-place non-blocked version.\n";
+#endif
+ RC rc = SUCCESS;
+
+ if(
+ ( nrows( U ) != nrows( H ) ) ||
+ ( ncols( U ) != ncols( H ) )
+ ) {
+ std::cerr << "Incompatible sizes in cholesky_uptr.\n";
+ return FAILED;
+ }
+
+ const size_t n = nrows( H );
+
+ // Out of place specification of the operation
+ Matrix< D, typename MatH::structure > UU( n );
+ rc = rc ? rc : set( UU, H );
+
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cerr << " set( UU, H ) failed\n";
+ return rc;
+ }
+ print_matrix( " -- UU -- " , UU );
+#endif
+
+ for( size_t k = 0; k < n; ++k ) {
+#ifdef DEBUG
+ std::cout << "============ Iteration " << k << " ============" << std::endl;
+#endif
+
+ auto a = get_view( UU, k, utils::range( k, n ) );
+#ifdef DEBUG
+ print_vector( " -- a -- " , a );
+#endif
+
+ // U[ k, k ] = alpha = sqrt( UU[ k, k ] )
+ Scalar< D > alpha;
+ rc = rc ? rc : eWiseLambda(
+ [ &alpha, &ring ]( const size_t i, D &val ) {
+ if( i == 0 ) {
+ (void) set( alpha, alp::Scalar< D >( std::sqrt( val ) ) );
+ val = *alpha;
+ }
+ },
+ a
+ );
+
+#ifdef DEBUG
+ std::cout << "alpha " << *alpha << std::endl;
+ if( rc != SUCCESS ) {
+ std::cerr << " eWiseLambda( lambda, view ) (0) failed\n";
+ return rc;
+ }
+#endif
+
+ auto v = get_view( UU, k, utils::range( k + 1, n ) );
+#ifdef DEBUG
+ print_vector( " -- v -- " , v );
+#endif
+ // UU[ k + 1: , k ] = UU[ k + 1: , k ] / alpha
+ rc = rc ? rc : foldl( v, alpha, divide );
+
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cerr << " eWiseLambda( lambda, view ) (1) failed\n";
+ return rc;
+ }
+#endif
+
+ // UU[ k+1: , k+1: ] -= v*v^T
+ auto Uprim = get_view( UU, utils::range( k + 1, n ), utils::range( k + 1, n ) );
+
+ auto vstar = conjugate( v );
+ auto vvt = outer( vstar, ring.getMultiplicativeOperator() );
+#ifdef DEBUG
+ print_vector( " -- v -- " , v );
+ print_matrix( " vvt ", vvt );
+#endif
+ rc = rc ? rc : foldl( Uprim, vvt, minus );
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cerr << " eWiseLambda( lambda, view ) (2) failed\n";
+ return rc;
+ }
+#endif
+ }
+
+ // Finally collect output into U matrix and return
+ for( size_t k = 0; k < n; ++k ) {
+
+ // U[ k, k: ] = UU[ k, k: ]
+ auto vU = get_view( U, k, utils::range( k, n ) );
+ auto vUU = get_view( UU, k, utils::range( k, n ) );
+
+ rc = set( vU, vUU );
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cerr << " set( view, view ) failed\n";
+ return rc;
+ }
+#endif
+ }
+
+ return rc;
+ }
+
+ /**
+ * Computes the blocked version Cholesky decomposition U^TU = H of a real symmetric
+ * positive definite (SPD) matrix H where \a U is upper triangular.
+ * U^T is equvalent to transpose(U)
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type minus operator used in the computation
+ * @param[out] U output upper triangular matrix
+ * @param[in] H input real symmetric positive definite matrix
+ * @param[in] ring The semiring used in the computation
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename MatU,
+ typename MatH,
+ typename D = typename MatU::value_type,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ typename Minus = operators::subtract< D >,
+ std::enable_if_t<
+ is_matrix< MatU >::value &&
+ is_matrix< MatH >::value &&
+ structures::is_a< typename MatU::structure, structures::UpperTriangular >::value &&
+ structures::is_a< typename MatH::structure, structures::SymmetricPositiveDefinite >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value
+ > * = nullptr
+ >
+ RC cholesky_uptr_blk(
+ MatU &U,
+ const MatH &H,
+ const size_t &bs,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus()
+ ) {
+#ifdef DEBUG
+ std::cout << "Entered cholesky_upr out-of-place blocked version.\n";
+#endif
+ const Scalar< D > zero( ring.template getZero< D >() );
+
+ if(
+ ( nrows( U ) != nrows( H ) ) ||
+ ( ncols( U ) != ncols( H ) )
+ ) {
+ std::cerr << "Incompatible sizes in cholesky_uptr_blk.\n";
+ return FAILED;
+ }
+
+ RC rc = SUCCESS;
+
+ const size_t n = nrows( U );
+
+ Matrix< D, typename MatH::structure > UU( n );
+ rc = rc ? rc : set( UU, H );
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cout << "set failed\n";
+ return rc;
+ }
+#endif
+
+ //nb: number of blocks of (max) size bz
+ if( ( bs == 0 ) || ( bs > n ) ) {
+ std::cerr << "Block size has illegal value, bs = " << bs << " .\n";
+ std::cerr << "It should be from interval < 0, " << n << "] .\n";
+ return FAILED;
+ }
+ size_t nb = n / bs;
+ if( n % bs != 0 ){
+ nb = nb + 1;
+ }
+
+
+ for( size_t i = 0; i < nb; ++i ) {
+ const size_t a = std::min( i * bs, n );
+ const size_t b = std::min( ( i + 1 ) * bs, n );
+ const size_t c = n;
+
+ const utils::range range1( a, b );
+ const utils::range range2( b, c );
+
+ auto A11 = get_view( UU, range1, range1 );
+
+ // for complex we should conjugate A12
+ auto A12 = get_view< structures::General >( UU, range1, range2 );
+
+ //A11=cholesky(A11)
+ auto A11_out = get_view( U, range1, range1 );
+
+ rc = rc ? rc : cholesky_uptr( A11_out, A11, ring );
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cout << "cholesky_uptr failed\n";
+ return rc;
+ }
+#endif
+
+ auto A12_out = get_view< structures::General >( U, range1, range2 );
+ auto A11_out_T = get_view< alp::view::transpose >( A11_out );
+
+ rc = rc ? rc : algorithms::forwardsubstitution(
+ A11_out_T,
+ A12_out,
+ A12,
+ ring
+ );
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cout << "Forwardsubstitution failed\n";
+ return rc;
+ }
+#endif
+
+ Matrix< D, typename MatH::structure > Reflector( ncols( A12_out ) );
+ rc = rc ? rc : set( Reflector, zero );
+ rc = rc ? rc : mxm( Reflector, get_view< alp::view::transpose >( A12_out ), A12_out, ring );
+
+ auto A22 = get_view( UU, range2, range2 );
+ rc = rc ? rc : foldl( A22, Reflector, minus );
+ }
+
+ return rc;
+ }
+
+ /** inplace non-blocked versions, part below diagonal is not modified */
+ template<
+ typename MatU,
+ typename D = typename MatU::value_type,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ typename Minus = operators::subtract< D >,
+ typename Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_matrix< MatU >::value &&
+ structures::is_a< typename MatU::structure, structures::Square >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value
+ > * = nullptr
+ >
+ RC cholesky_uptr(
+ MatU &U,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+#ifdef DEBUG
+ std::cout << "Entered cholesky_upr in-place non-blocked version.\n";
+#endif
+ const Scalar< D > zero( ring.template getZero< D >() );
+
+ RC rc = SUCCESS;
+
+ const size_t n = nrows( U );
+
+ for( size_t k = 0; k < n; ++k ) {
+#ifdef DEBUG
+ std::cout << "============ Iteration " << k << " ============" << std::endl;
+#endif
+
+ auto a = get_view( U, k, utils::range( k, n ) );
+
+ // U[ k, k ] = alpha = sqrt( UU[ k, k ] )
+ Scalar< D > alpha;
+ rc = rc ? rc : eWiseLambda(
+ [ &alpha, &ring ]( const size_t i, D &val ) {
+ if( i == 0 ) {
+ (void) set( alpha, alp::Scalar< D >( std::sqrt( val ) ) );
+ val = *alpha;
+ }
+ },
+ a
+ );
+
+#ifdef DEBUG
+ std::cout << "alpha " << *alpha << std::endl;
+ if( rc != SUCCESS ) {
+ std::cerr << " eWiseLambda( lambda, view ) (0) failed\n";
+ return rc;
+ }
+#endif
+
+ auto v = get_view( U, k, utils::range( k + 1, n ) );
+#ifdef DEBUG
+ print_vector( " -- v -- " , v );
+#endif
+ // UU[ k, k + 1: ] = UU[ k, k + 1: ] / alpha
+ rc = rc ? rc : foldl( v, alpha, divide );
+
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cerr << " eWiseLambda( lambda, view ) (1) failed\n";
+ return rc;
+ }
+#endif
+
+ // UU[ k+1: , k+1: ] -= v*v^T
+ auto Uprim = get_view( U, utils::range( k + 1, n ), utils::range( k + 1, n ) );
+
+ auto vvt = outer( v, ring.getMultiplicativeOperator() );
+#ifdef DEBUG
+ print_vector( " -- v -- " , v );
+ print_matrix( " vvt ", vvt );
+#endif
+
+ rc = rc ? rc : foldl( Uprim, vvt, minus );
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cerr << " eWiseLambda( lambda, view ) (2) failed\n";
+ return rc;
+ }
+#endif
+
+ }
+
+ return rc;
+ }
+
+
+ /** inplace blocked version, part below diagonal is not modified */
+ template<
+ typename MatU,
+ typename D = typename MatU::value_type,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ typename Minus = operators::subtract< D >,
+ std::enable_if_t<
+ is_matrix< MatU >::value &&
+ structures::is_a< typename MatU::structure, structures::Square >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value
+ > * = nullptr
+ >
+ RC cholesky_uptr_blk(
+ MatU &U,
+ const size_t &bs,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus()
+ ) {
+#ifdef DEBUG
+ std::cout << "Entered cholesky_upr in-place blocked version.\n";
+#endif
+ const Scalar< D > zero( ring.template getZero< D >() );
+
+ RC rc = SUCCESS;
+
+ const size_t n = nrows( U );
+
+ //nb: number of blocks of (max) size bz
+ if( ( bs == 0 ) || ( bs > n ) ) {
+ std::cerr << "Block size has illegal value, bs = " << bs << " .\n";
+ std::cerr << "It should be from interval < 0, " << n << "] .\n";
+ return FAILED;
+ }
+ size_t nb = n / bs;
+ if( n % bs != 0 ){
+ nb = nb + 1;
+ }
+
+
+ for( size_t i = 0; i < nb; ++i ) {
+ const size_t a = std::min( i * bs, n );
+ const size_t b = std::min( ( i + 1 ) * bs, n );
+ const size_t c = n;
+
+ const utils::range range1( a, b );
+ const utils::range range2( b, c );
+
+ auto A11 = get_view< structures::Square >( U, range1, range1 );
+
+ // for complex we should conjugate A12
+ auto A12 = get_view< structures::General >( U, range1, range2 );
+
+ rc = rc ? rc : cholesky_uptr( A11, ring );
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cout << "cholesky_uptr failed\n";
+ return rc;
+ }
+#endif
+
+ //auto A11_T = get_view< alp::view::transpose >( A11 );
+ auto A11UT = get_view< structures::UpperTriangular >( U, range1, range1 );
+
+ auto A11UT_T = get_view< alp::view::transpose >( A11UT );
+
+ rc = rc ? rc : algorithms::forwardsubstitution( A11UT_T, A12, ring );
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cout << "Forwardsubstitution failed\n";
+ return rc;
+ }
+#endif
+
+ auto A22UT = get_view< structures::Symmetric >( U, range2, range2 );
+ rc = rc ? rc : algorithms::fused_symm_mxm_foldl( A22UT, A12, ring, minus );
+#ifdef DEBUG
+ if( rc != SUCCESS ) {
+ std::cout << "algorithms::fused_symm_mxm_foldl failed\n";
+ return rc;
+ }
+#endif
+ }
+
+ return rc;
+ }
+
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/conjugate_gradient.hpp b/include/alp/algorithms/conjugate_gradient.hpp
new file mode 100644
index 000000000..3515a070b
--- /dev/null
+++ b/include/alp/algorithms/conjugate_gradient.hpp
@@ -0,0 +1,295 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author Aristeidis Mastoras
+ */
+
+#ifndef _H_ALP_ALGORITHMS_CONJUGATE_GRADIENT_ALP
+#define _H_ALP_ALGORITHMS_CONJUGATE_GRADIENT_ALP
+
+#include
+
+#include
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * Solves a linear system \f$ b = Ax \f$ with \f$ x \f$ unknown by the Conjugate
+ * Gradients (CG) method on general fields.
+ *
+ * Does not perform any preconditioning.
+ *
+ * @tparam descr The user descriptor
+ * @tparam IOType The input/output vector nonzero type
+ * @tparam ResidualType The type of the residual
+ * @tparam NonzeroType The matrix nonzero type
+ * @tparam InputType The right-hand side vector nonzero type
+ * @tparam Ring The semiring under which to perform CG
+ * @tparam Minus The minus operator corresponding to the inverse of the
+ * additive operator of the given \a Ring.
+ * @tparam Divide The division operator corresponding to the inverse of
+ * the multiplicative operator of the given \a Ring.
+ *
+ * Valid descriptors to this algorithm are:
+ * -# descriptors::no_casting
+ *
+ * By default, i.e., if none of \a ring, \a minus, or \a divide (nor their
+ * types) are explicitly provided by the user, the natural field on double
+ * data types will be assumed.
+ *
+ * \note An abstraction of a field that encapsulates \a Ring, \a Minus, and
+ * \a Divide may be more appropriate. This will also naturally ensure
+ * that demands on domain types are met.
+ *
+ * @param[in,out] x On input: the initial guess to the solution.
+ * On output: the last computed approximation.
+ * @param[in] A The (square) positive semi-definite system
+ * matrix.
+ * @param[in] b The known right-hand side in \f$ Ax = b \f$.
+ * @param[in] max_iterations The maximum number of CG iterations.
+ * @param[in] tol The requested relative tolerance.
+ * @param[out] residual The residual corresponding to output \a x.
+ * @param[in,out] r A temporary vector of the same size as \a x.
+ * @param[in,out] u A temporary vector of the same size as \a x.
+ * @param[in,out] temp A temporary vector of the same size as \a x.
+ * @param[in] ring The semiring under which to perform the CG.
+ * @param[in] minus The inverse of the additive operator of
+ * \a ring.
+ * @param[in] divide The inverse of the multiplicative operator
+ * of \a ring.
+ *
+ * \todo There is a sqrt(...) operator that lives outside of the current
+ * algebraic abstractions. Would be great if that could be eliminated.
+ * See internal issue #89.
+ */
+ template< Descriptor descr = descriptors::no_operation,
+ typename IOType, typename IOStructure,
+ typename ResidualType, typename ResidualStructure,
+ typename NonzeroType, typename NonzeroStructure, typename NonzeroStorage, typename NonzeroView,
+ typename InputType, typename InputStructure, typename InputStorage, typename InputView,
+ class Ring = Semiring< operators::add< IOType >, operators::mul< IOType >, identities::zero, identities::one >,
+ class Minus = operators::subtract< IOType >,
+ class Divide = operators::divide< IOType > >
+ RC conjugate_gradient( Vector< IOType, structures::General, Sparse > & x,
+ const Matrix< NonzeroType, structures::SymmetricPositiveDefinite, Sparse > & A,
+ const Vector< InputType, structures::General, Sparse > & b,
+ const size_t max_iterations,
+ Scalar< ResidualType > tol,
+ size_t & iterations,
+ Scalar< ResidualType, ResidualStructure > & residual,
+ Vector< IOType, structures::General, Sparse > & r,
+ Vector< IOType, structures::General, Sparse > & u,
+ Vector< IOType, structures::General, Sparse > & temp,
+ const Ring & ring = Ring(),
+ const Minus & minus = Minus(),
+ const Divide & divide = Divide() ) {
+
+ // static checks
+ static_assert( std::is_floating_point< ResidualType >::value,
+ "Can only use the CG algorithm with floating-point residual "
+ "types." ); // unless some different norm were used: issue #89
+ static_assert( !( descr & descriptors::no_casting ) || (
+ std::is_same< IOType, ResidualType >::value &&
+ std::is_same< IOType, NonzeroType >::value &&
+ std::is_same< IOType, InputType >::value
+ ), "One or more of the provided containers have differing element types "
+ "while the no-casting descriptor has been supplied"
+ );
+ static_assert( !( descr & descriptors::no_casting ) || (
+ std::is_same< NonzeroType, typename Ring::D1 >::value &&
+ std::is_same< IOType, typename Ring::D2 >::value &&
+ std::is_same< InputType, typename Ring::D3 >::value &&
+ std::is_same< InputType, typename Ring::D4 >::value
+ ), "no_casting descriptor was set, but semiring has incompatible domains "
+ "with the given containers."
+ );
+ static_assert( !( descr & descriptors::no_casting ) || (
+ std::is_same< InputType, typename Minus::D1 >::value &&
+ std::is_same< InputType, typename Minus::D2 >::value &&
+ std::is_same< InputType, typename Minus::D3 >::value
+ ), "no_casting descriptor was set, but given minus operator has "
+ "incompatible domains with the given containers."
+ );
+ static_assert( !( descr & descriptors::no_casting ) || (
+ std::is_same< ResidualType, typename Divide::D1 >::value &&
+ std::is_same< ResidualType, typename Divide::D2 >::value &&
+ std::is_same< ResidualType, typename Divide::D3 >::value
+ ), "no_casting descriptor was set, but given divide operator has "
+ "incompatible domains with the given tolerance type."
+ );
+ static_assert( std::is_floating_point< ResidualType >::value,
+ "Require floating-point residual type."
+ );
+
+ constexpr const Descriptor descr_dense = descr | descriptors::dense;
+ Scalar< ResidualType > zero( ring.template getZero< ResidualType >() );
+ const size_t n = ncols( A );
+
+ // dynamic checks
+ {
+ const size_t m = nrows( A );
+ if( size( x ) != n ) {
+ return MISMATCH;
+ }
+ if( size( b ) != m ) {
+ return MISMATCH;
+ }
+ if( size( r ) != n || size( u ) != n || size( temp ) != n ) {
+ std::cerr << "Error: provided workspace vectors are not of the correct "
+ << "length.\n";
+ return MISMATCH;
+ }
+ if( m != n ) {
+ std::cerr << "Warning: alp::algorithms::conjugate_gradient requires "
+ << "square input matrices, but a non-square input matrix was "
+ << "given instead.\n";
+ return ILLEGAL;
+ }
+
+ // capacities
+ if( capacity( x ) != n ) {
+ return ILLEGAL;
+ }
+ if( capacity( r ) != n || capacity( u ) != n || capacity( temp ) != n ) {
+ return ILLEGAL;
+ }
+
+ // others
+ if( tol <= zero ) {
+ std::cerr << "Error: tolerance input to CG must be strictly positive\n";
+ return ILLEGAL;
+ }
+ }
+
+ // make x and b structurally dense (if not already) so that the remainder
+ // algorithm can safely use the dense descriptor for faster operations
+ {
+ RC rc = SUCCESS;
+ if( nnz( x ) != n ) {
+ rc = set< descriptors::invert_mask | descriptors::structural >(
+ x, x, zero
+ );
+ }
+ if( rc != SUCCESS ) {
+ return rc;
+ }
+ assert( nnz( x ) == n );
+ }
+
+ Scalar< ResidualType > alpha, sigma, bnorm;
+
+ // temp = 0
+ RC ret = set( temp, zero );
+
+ // temp += A * x
+ ret = ret ? ret : mxv< descr_dense >( temp, A, x, ring );
+
+ // r = b - temp;
+ ret = ret ? ret : set( r, zero );
+ ret = ret ? ret : foldl( r, b, ring.getAdditiveMonoid() );
+ assert( nnz( r ) == n );
+ assert( nnz( temp ) == n );
+ ret = ret ? ret : foldl< descr_dense >( r, temp, minus );
+ assert( ret == SUCCESS );
+ assert( nnz( r ) == n );
+
+ // u = r;
+ ret = ret ? ret : set( u, r );
+
+ // sigma = r' * r;
+ sigma = zero;
+ ret = ret ? ret : dot< descr_dense >( sigma, r, r, ring );
+
+ // bnorm = b' * b;
+ bnorm = zero;
+ ret = ret ? ret : dot< descr_dense >( bnorm, b, b, ring );
+
+ if( ret == SUCCESS ) {
+ foldl(tol, tol, ring.getMultiplicativeMonoid() );
+ foldl(tol, bnorm, ring.getMultiplicativeMonoid() );
+ }
+
+ size_t iter = 0;
+
+ do {
+ // temp = 0
+ set( temp, zero );
+
+ // temp += A * u;
+ ret = ret ? ret : mxv< descr_dense >( temp, A, u, ring );
+
+ // residual = u' * temp
+ residual = zero;
+ ret = ret ? ret : dot< descr_dense >( residual, temp, u, ring );
+
+ // alpha = sigma / residual;
+ ret = ret ? ret : apply( alpha, sigma, residual, divide );
+
+ // x = x + alpha * u;
+ ret = ret ? ret : eWiseMul< descr_dense >( x, alpha, u, ring );
+
+ // temp = alpha .* temp
+ // Warning: operator-based foldr requires temp be dense
+ ret = ret ? ret : foldr( alpha, temp, ring.getMultiplicativeMonoid() );
+
+ // r = r - temp;
+ ret = ret ? ret : foldl< descr_dense >( r, temp, minus );
+
+ // residual = r' * r;
+ residual = zero;
+ ret = ret ? ret : dot< descr_dense >( residual, r, r, ring );
+
+ if( ret == SUCCESS ) {
+ if( residual < tol ) {
+ break;
+ }
+ }
+
+ // alpha = residual / sigma;
+ ret = ret ? ret : apply( alpha, residual, sigma, divide );
+
+ // temp = r + alpha * u;
+ ret = ret ? ret : set( temp, r );
+ ret = ret ? ret : eWiseMul< descr_dense >( temp, alpha, u, ring );
+ assert( nnz( temp ) == size( temp ) );
+
+ // u = temp
+ std::swap( u, temp );
+
+ // sigma = residual;
+ sigma = residual;
+
+ } while( iter++ < max_iterations && ret == SUCCESS );
+
+ // output
+ iterations = iter;
+
+ if( ret != SUCCESS ) {
+ return FAILED;
+ } else {
+ return SUCCESS;
+ }
+ }
+
+ } // namespace algorithms
+
+} // end namespace alp
+
+#endif // end _H_GRB_ALGORITHMS_CONJUGATE_GRADIENT_ALP
diff --git a/include/alp/algorithms/forwardsubstitution.hpp b/include/alp/algorithms/forwardsubstitution.hpp
new file mode 100644
index 000000000..05c0dd9bc
--- /dev/null
+++ b/include/alp/algorithms/forwardsubstitution.hpp
@@ -0,0 +1,301 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+#include
+
+#include
+#ifdef DEBUG
+#include "../../../tests/utils/print_alp_containers.hpp"
+#endif
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * Solves linear system Ax=b
+ * where A is LowerTriangular matrix, b is given RHS vector
+ * and x is the solution.
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[in] A input upper trinagular matrix
+ * @param[in] b input RHS vector
+ * @param[out] x solution vector
+ * @param[in] ring The semiring used in the computation
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename MatA,
+ typename Vecx,
+ typename Vecb,
+ typename D = typename MatA::value_type,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ typename Minus = operators::subtract< D >,
+ typename Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_vector< Vecx >::value &&
+ is_vector< Vecb >::value &&
+ is_matrix< MatA >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value &&
+ structures::is_a< typename MatA::structure, structures::LowerTriangular >::value
+ > * = nullptr
+ >
+ RC forwardsubstitution(
+ MatA &A,
+ Vecx &x,
+ Vecb &b,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+
+ RC rc = SUCCESS;
+
+ if( ( ncols( A ) != size( x ) ) || ( size( b ) != size( x ) ) ) {
+ std::cerr << "Incompatible sizes in trsv.\n";
+ return FAILED;
+ }
+
+ const size_t n = nrows( A );
+
+ for( size_t i = 0; i < n ; ++i ) {
+ Scalar< D > alpha( ring.template getZero< D >() );
+ auto A_i = get_view( A, i, utils::range( 0, i ) );
+ auto A_ii = get_view( A, i, utils::range( i, i + 1 ) );
+ auto x_i = get_view( x, utils::range( i, i + 1 ) );
+ auto b_i = get_view( b, utils::range( i, i + 1 ) );
+ auto x_0_i = get_view( x, utils::range( 0, i ) );
+ rc = rc ? rc : alp::dot( alpha, A_i, alp::conjugate( x_0_i ), ring );
+ rc = rc ? rc : alp::set( x_i, b_i );
+ rc = rc ? rc : alp::foldl( x_i, alpha, minus );
+ rc = rc ? rc : alp::set( alpha, Scalar< D >( ring.template getZero< D >() ) );
+ rc = rc ? rc : alp::foldl( alpha, A_ii, ring.getAdditiveMonoid() );
+ rc = rc ? rc : alp::foldl( x_i, alpha, divide );
+ }
+
+ return rc;
+ }
+
+ /** matrix version */
+ template<
+ typename MatA,
+ typename MatX,
+ typename MatB,
+ typename D = typename MatA::value_type,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ typename Minus = operators::subtract< D >,
+ typename Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_matrix< MatA >::value &&
+ is_matrix< MatX >::value &&
+ is_matrix< MatB >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value &&
+ structures::is_a< typename MatA::structure, structures::LowerTriangular >::value &&
+ structures::is_a< typename MatX::structure, typename MatB::structure >::value
+ > * = nullptr
+ >
+ RC forwardsubstitution(
+ MatA &A,
+ MatX &X,
+ MatB &B,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+
+ RC rc = SUCCESS;
+
+ if (
+ ( nrows( X ) != nrows( B ) ) ||
+ ( ncols( X ) != ncols( B ) ) ||
+ ( ncols( A ) != nrows( X ) )
+ ) {
+ std::cerr << "Incompatible sizes in trsm.\n";
+ return FAILED;
+ }
+
+ const size_t m = nrows( X );
+ const size_t n = ncols( X );
+
+ for( size_t i = 0; i < n ; ++i ) {
+ auto x = get_view( X, utils::range( 0, m ), i );
+ auto b = get_view( B, utils::range( 0, m ), i );
+ rc = rc ? rc : algorithms::forwardsubstitution( A, x, b, ring, minus, divide );
+ }
+
+ return rc;
+ }
+
+ /** inplace versions */
+ template<
+ typename MatA,
+ typename Vecx,
+ typename D = typename MatA::value_type,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ typename Minus = operators::subtract< D >,
+ typename Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_vector< Vecx >::value &&
+ is_matrix< MatA >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value &&
+ structures::is_a< typename MatA::structure, structures::LowerTriangular >::value
+ > * = nullptr
+ >
+ RC forwardsubstitution(
+ MatA &A,
+ Vecx &x,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+
+ RC rc = SUCCESS;
+
+ if( nrows( A ) != size( x ) ) {
+ std::cerr << "Incompatible sizes in trsv.\n";
+ return FAILED;
+ }
+
+ const size_t n = nrows( A );
+
+ for( size_t i = 0; i < n ; ++i ) {
+ Scalar< D > alpha( ring.template getZero< D >() );
+ auto A_i = get_view( A, i, utils::range( 0, i ) );
+ auto A_ii = get_view( A, i, utils::range( i, i + 1 ) );
+ auto x_i = get_view( x, utils::range( i, i + 1 ) );
+ auto x_0_i = get_view( x, utils::range( 0, i ) );
+ rc = rc ? rc : alp::dot( alpha, A_i, alp::conjugate( x_0_i ), ring );
+ rc = rc ? rc : alp::foldl( x_i, alpha, minus );
+ rc = rc ? rc : alp::set( alpha, Scalar< D >( ring.template getZero< D >() ) );
+ rc = rc ? rc : alp::foldl( alpha, A_ii, ring.getAdditiveMonoid() );
+ rc = rc ? rc : alp::foldl( x_i, alpha, divide );
+ }
+
+ return rc;
+ }
+
+ /** inplace matrix version */
+ template<
+ typename MatA,
+ typename MatX,
+ typename D = typename MatA::value_type,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ typename Minus = operators::subtract< D >,
+ typename Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_matrix< MatA >::value &&
+ is_matrix< MatX >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value &&
+ structures::is_a< typename MatA::structure, structures::LowerTriangular >::value &&
+ config::default_backend != Backend::dispatch
+ > * = nullptr
+ >
+ RC forwardsubstitution(
+ MatA &A,
+ MatX &X,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+
+ RC rc = SUCCESS;
+
+ if ( ncols( A ) != nrows( X ) ) {
+ std::cerr << "Incompatible sizes in trsm.\n";
+ return FAILED;
+ }
+
+ const size_t m = nrows( X );
+ const size_t n = ncols( X );
+
+ for( size_t i = 0; i < n ; ++i ) {
+ auto x = get_view( X, utils::range( 0, m ), i );
+ rc = rc ? rc : algorithms::forwardsubstitution( A, x, ring, minus, divide );
+ }
+
+ return rc;
+ }
+
+ /** inplace matrix version */
+ template<
+ typename MatA,
+ typename MatX,
+ typename D = typename MatA::value_type,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ typename Minus = operators::subtract< D >,
+ typename Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_matrix< MatA >::value &&
+ is_matrix< MatX >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value &&
+ structures::is_a< typename MatA::structure, structures::LowerTriangular >::value &&
+ config::default_backend == Backend::dispatch
+ > * = nullptr
+ >
+ RC forwardsubstitution(
+ MatA &A,
+ MatX &X,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+#ifdef DEBUG
+ std::cout << "Entered TRSM in-place matrix version (offloads to blas_trsm).\n";
+#endif
+ (void) ring;
+ (void) minus;
+ (void) divide;
+ RC rc = SUCCESS;
+
+ if ( ncols( A ) != nrows( X ) ) {
+ std::cerr << "Incompatible sizes in trsm.\n";
+ return FAILED;
+ }
+
+ const size_t m = nrows( X );
+ const size_t n = ncols( X );
+
+#ifdef _ALP_WITH_DISPATCH
+ cblas_dtrsm(
+ CblasRowMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit,
+ m, n,
+ 1,
+ internal::getRawPointerToFirstElement( A ), internal::getLeadingDimension( A ),
+ internal::getRawPointerToFirstElement( X ), internal::getLeadingDimension( X )
+ );
+#endif
+
+ return rc;
+ }
+
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/fused_mxm_foldl.hpp b/include/alp/algorithms/fused_mxm_foldl.hpp
new file mode 100644
index 000000000..b4581862d
--- /dev/null
+++ b/include/alp/algorithms/fused_mxm_foldl.hpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+
+#include
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * Performs mxm followed by foldl: C += A*AT.
+ * The purpose of this function is to simulate operation fusion.
+ *
+ * @tparam MatrixC Type of a symmetric ALP Matrix C
+ * @tparam MatrixA Type of ALP Matrix A
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring passed to mxm
+ * @tparam Op Type of the operator passed to foldl
+ *
+ * @param[inout] C Matrix C
+ * @param[in] A Matrix A
+ * @param[in] ring Ring passed to mxm
+ * @param[in] op Operator passed to foldl
+ *
+ * @return RC SUCCESS if the execution was correct
+ * \note This does not support complex numbers at the moment.
+ */
+ template<
+ typename MatrixC,
+ typename MatrixA,
+ typename D = typename MatrixC::value_type,
+ typename Ring, typename Op,
+ std::enable_if_t<
+ alp::is_matrix< MatrixC >::value &&
+ alp::is_matrix< MatrixA >::value &&
+ alp::is_semiring< Ring >::value &&
+ alp::is_operator< Op >::value &&
+ config::default_backend != Backend::dispatch
+ > * = nullptr
+ >
+ RC fused_symm_mxm_foldl(
+ MatrixC &C,
+ MatrixA &A,
+ const Ring &ring = Ring(),
+ const Op &op = Op()
+ ) {
+
+ // Verify that the C is of dimensions nrows(A) x nrows(A)
+ const size_t m = ncols( A );
+ if( ( nrows( C ) != m ) || ( ncols( C ) != m ) ) {
+ return MISMATCH;
+ }
+
+ const auto AT = get_view< view::transpose >( A );
+
+ Matrix< D, typename MatrixC::structure, Density::Dense > AAT( m );
+
+ RC rc = SUCCESS;
+
+ // AAT = 0
+ rc = rc ? rc : set( AAT, Scalar< D >( ring.template getZero< D >() ) );
+ assert( rc == SUCCESS );
+
+ // AAT += A * AT
+ rc = rc ? rc : mxm( AAT, AT, A, ring );
+ assert( rc == SUCCESS );
+
+ // C += AAT
+ rc = rc ? rc : foldl( C, AAT, op );
+ assert( rc == SUCCESS );
+
+ return rc;
+ }
+
+ /**
+ * Specialization for dispatch backend. Offloads to syrk.
+ * Assumes that A is transposed.
+ */
+ template<
+ typename MatrixC,
+ typename MatrixA,
+ typename D = typename MatrixC::value_type,
+ typename Ring, typename Op,
+ std::enable_if_t<
+ alp::is_matrix< MatrixC >::value &&
+ alp::is_matrix< MatrixA >::value &&
+ alp::is_semiring< Ring >::value &&
+ alp::is_operator< Op >::value &&
+ config::default_backend == Backend::dispatch
+ > * = nullptr
+ >
+ RC fused_symm_mxm_foldl(
+ MatrixC &C,
+ MatrixA &A,
+ const Ring &ring = Ring(),
+ const Op &op = Op()
+ ) {
+ (void) ring;
+ (void) op;
+
+ // Verify that the C is of dimensions nrows(A) x nrows(A)
+ const size_t k = nrows( A );
+ const size_t m = ncols( A );
+ if( ( nrows( C ) != m ) || ( ncols( C ) != m ) ) {
+ return MISMATCH;
+ }
+
+ RC rc = SUCCESS;
+
+#ifdef _ALP_WITH_DISPATCH
+ cblas_dsyrk(
+ CblasRowMajor, CblasUpper, CblasTrans,
+ m,
+ k,
+ -1,
+ internal::getRawPointerToFirstElement( A ),
+ internal::getLeadingDimension( A ),
+ 1,
+ internal::getRawPointerToFirstElement( C ),
+ internal::getLeadingDimension( C )
+ );
+#endif
+
+ return rc;
+ }
+
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/gemm.hpp b/include/alp/algorithms/gemm.hpp
new file mode 100644
index 000000000..ed5f7f5a8
--- /dev/null
+++ b/include/alp/algorithms/gemm.hpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+
+#include
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * @brief gemm_like example where a sub-matrix
+ * \f$C_blk = \alpha \cdot At_blk \cdot B_blk + \beta \cdot C_blk\f$,
+ * where \f$At_blk, B_blk, C_blk\f$ are sub-matrices (optionally at
+ * a stride both row- and column-wise) of matrices
+ * \f$A, B, C\f$, respectively, and \f$At_blk\f$ and \f$B_blk$ may be
+ * transposed views over the \f$A\f$ and \f$B\f$ sub-matrices
+ * depending on parameters \f$transposeA\f$ and \f$transposeB\f$, respectively.
+ *
+ * @tparam transposeA Whether to transpose A
+ * @tparam transposeB Whether to transpose B
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in computation
+ * @param m Number of rows of matrices \a C_blk and \a At_blk
+ * @param n Number of columns of matrices \a C_blk and \a B_blk
+ * @param k Number of rows of matrix \a B_blk and columns of \a A_blk
+ * @param alpha Alpha scalar parameter
+ * @param A reference to matrix A
+ * @param startAr Row offset of \a At_blk within \a A
+ * @param startAc Column offset of \a At_blk within \a A
+ * @param B reference to matrix B
+ * @param startBr Row offset of \a B_blk within \a B
+ * @param startBc Column offset of \a B_blk within \a B
+ * @param beta Beta scalar parameter
+ * @param C reference to matrix C
+ * @param startCr Row offset of \a C_blk within \a C
+ * @param startCc Column offset of \a C_blk within \a C
+ * @param ring The semiring used for performing operations
+ * @return RC SUCCESS if the execution was correct
+ */
+ template<
+ bool transposeA = false,
+ bool transposeB = false,
+ typename D = double,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >
+ >
+ RC gemm_like_example(
+ const size_t m,
+ const size_t n,
+ const size_t k,
+ const Scalar< D > &alpha,
+ Matrix< D, structures::General, Dense > &A,
+ const size_t startAr,
+ const size_t strideAr,
+ const size_t startAc,
+ const size_t strideAc,
+ Matrix< D, structures::General, Dense > &B,
+ const size_t startBr,
+ const size_t strideBr,
+ const size_t startBc,
+ const size_t strideBc,
+ const Scalar< D > &beta,
+ Matrix< D, structures::General, Dense > &C,
+ const size_t startCr,
+ const size_t strideCr,
+ const size_t startCc,
+ const size_t strideCc,
+ const Ring &ring = Ring()
+ ) {
+
+ // Ensure the compatibility of parameters
+ const size_t endCr = startCr + m * strideCr;
+ const size_t endCc = startCc + n * strideCc;
+ const size_t endAr = transposeA ? startAr + k * strideAr : startAr + m * strideAr;
+ const size_t endAc = transposeA ? startAc + m * strideAc : startAc + k * strideAc;
+ const size_t endBr = transposeB ? startBr + n * strideBr : startBr + k * strideBr;
+ const size_t endBc = transposeB ? startBc + k * strideBc : startBc + n * strideBc;
+
+ if(
+ ( endAr > nrows( A ) ) || ( endAc > ncols( A ) ) ||
+ ( endBr > nrows( B ) ) || ( endBc > ncols( B ) ) ||
+ ( endCr > nrows( C ) ) || ( endCc > ncols( C ) )
+ ) {
+ return MISMATCH;
+ }
+
+ const size_t mA = transposeA ? k : m;
+ const size_t kA = transposeA ? m : k;
+ auto A_blk_orig = get_view(
+ A,
+ utils::range( startAr, startAr + mA * strideAr, strideAr ),
+ utils::range( startAc, startAc + kA * strideAc, strideAc )
+ );
+
+ auto A_blk = get_view< transposeA ? view::transpose : view::original >( A_blk_orig );
+
+ const size_t kB = transposeB ? n : k;
+ const size_t nB = transposeB ? k : n;
+ auto B_blk_orig = get_view(
+ B,
+ utils::range( startBr, startBr + kB * strideBr, strideBr ),
+ utils::range( startBc, startBc + nB * strideBc, strideBc )
+ );
+
+ auto B_blk = get_view< transposeB ? view::transpose : view::original >( B_blk_orig );
+
+ auto C_blk = get_view(
+ C,
+ utils::range( startCr, startCr + m * strideCr, strideCr ),
+ utils::range( startCc, startCc + n * strideCc, strideCc )
+ );
+
+ Matrix< D, structures::General, Dense > C_tmp( m, n );
+
+ RC rc = SUCCESS;
+
+ // C_blk = beta * C_blk
+ rc = rc ? rc : foldr( beta, C_blk, ring.getMultiplicativeMonoid() );
+ assert( rc == SUCCESS );
+
+ // C_tmp = 0
+ rc = rc ? rc : set( C_tmp, Scalar< D >( ring.template getZero< D >() ) );
+ assert( rc == SUCCESS );
+ // C_tmp += At_blk * B_blk
+ rc = rc ? rc : mxm( C_tmp, A_blk, B_blk, ring );
+ assert( rc == SUCCESS );
+
+ // C_blk += alpha * C_tmp
+ rc = rc ? rc : eWiseMul( C_blk, alpha, C_tmp, ring );
+ assert( rc == SUCCESS );
+
+ return rc;
+ }
+
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/householder_bidiag.hpp b/include/alp/algorithms/householder_bidiag.hpp
new file mode 100644
index 000000000..25b44cd73
--- /dev/null
+++ b/include/alp/algorithms/householder_bidiag.hpp
@@ -0,0 +1,204 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include // use from grb
+#ifdef DEBUG
+#include "../tests/utils/print_alp_containers.hpp"
+#endif
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * Given a general matrix H perform an inplace Householder reflections in order to
+ * eliminate column elements H[i+d:,i] (below diagonal or subdigonal), H = U H.
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[in,out] U updated orthogonal matrix
+ * @param[in,out] H updated general matrix with column
+ * elements H[i+d:,i] eliminated
+ * @param[in] i column to eliminate
+ * @param[in] d offset from diagonal to eliminate, default 0
+ * @param[in] ring A semiring for operations
+ * @return RC SUCCESS if the execution was correct
+ */
+ template<
+ typename MatH,
+ typename MatU,
+ typename IndexType,
+ typename D = typename MatH::value_type,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >,
+ std::enable_if_t<
+ structures::is_a< typename MatH::structure, structures::General >::value &&
+ structures::is_a< typename MatU::structure, structures::Orthogonal >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value
+ > * = nullptr
+ >
+ RC elminate_below_ith_diag(
+ const IndexType i,
+ MatH &H,
+ MatU &U,
+ const IndexType d = 0,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ RC rc = SUCCESS;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+
+ const IndexType m = nrows( H );
+ const IndexType n = ncols( H );
+
+ // v=copy(A0[i+d:,i])
+ auto a = get_view( H, utils::range( i + d, m ), i );
+ Vector< D > v( m - ( i + d ) );
+ rc = rc ? rc : alp::set( v, a );
+
+ // alpha=v[0]/abs(v[0])
+ Scalar< D > alpha( zero );
+ auto v0 = get_view( v, utils::range( 0, 1 ) );
+ rc = rc ? rc : foldl( alpha, v0, ring.getAdditiveMonoid() );
+ rc = rc ? rc : foldl( alpha, Scalar< D >( std::abs( *alpha ) ), divide );
+
+ // alpha=alpha*norm(v)
+ Scalar< D > norm_v1( zero );
+ rc = rc ? rc : norm2( norm_v1, v, ring );
+ rc = rc ? rc : foldl( alpha, norm_v1, ring.getMultiplicativeOperator() );
+
+ // v[0]=v[0]-alpha
+ rc = rc ? rc : foldl( v0, alpha, minus );
+
+ // v=v/norm(v)
+ Scalar< D > norm_v2( zero );
+ rc = rc ? rc : norm2( norm_v2, v, ring );
+ rc = rc ? rc : foldl( v, norm_v2, divide );
+
+ //P1=zeros((m-(i+d),m-(i+d))).astype(complex)
+ //P1=P1-2*outer(v,conjugate(v))
+ auto vvh = outer( v, ring.getMultiplicativeOperator() );
+ Matrix< D, typename decltype( vvh )::structure, Dense > reflector( m - ( i + d ) );
+ rc = rc ? rc : alp::set( reflector, vvh );
+ rc = rc ? rc : foldl( reflector, Scalar< D > ( -2 ), ring.getMultiplicativeOperator() );
+
+ // A0=P.dot(A0)
+ auto Hupdate = get_view( H, utils::range( i + d, m ), utils::range( 0, n ) );
+ Matrix< D, structures::General, Dense > Temp1( m - ( i + d ) , n );
+ rc = rc ? rc : alp::set( Temp1, Hupdate );
+ rc = rc ? rc : mxm( Hupdate, reflector, Temp1, ring );
+
+ // Uk=Uk.dot(P)
+ auto Uupdate = get_view< structures::OrthogonalColumns >( U, utils::range( 0, m ), utils::range( i + d, m ) );
+ Matrix< D, structures::OrthogonalColumns, Dense > Temp2( m, m - ( i + d ) );
+ rc = rc ? rc : alp::set( Temp2, Uupdate );
+ rc = rc ? rc : mxm( Uupdate, Temp2, reflector, ring );
+
+ return rc;
+ }
+
+ /**
+ * Computes Householder (inplace) bidiagonalisation of general matrix \f$H = U B V \f$
+ * where \a H is general (complex or real),
+ * \a U orthogonal, \a B is bidiagonal and \a V orthogonal.
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[in,out] U updated orthogonal matrix
+ * @param[in,out] V updated orthogonal matrix
+ * @param[in,out] H input general matrix, output bidiagonal matrix (B)
+ * @param[in] ring A semiring for operations
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename MatH,
+ typename D = typename MatH::value_type,
+ typename MatU,
+ typename MatV,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_matrix< MatH >::value &&
+ is_matrix< MatU >::value &&
+ is_matrix< MatV >::value &&
+ structures::is_a< typename MatH::structure, structures::General >::value &&
+ structures::is_a< typename MatU::structure, structures::Orthogonal >::value &&
+ structures::is_a< typename MatV::structure, structures::Orthogonal >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value
+ > * = nullptr
+ >
+ RC householder_bidiag(
+ MatU &U,
+ MatH &H,
+ MatV &V,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ RC rc = SUCCESS;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+
+ const size_t m = nrows( H );
+ const size_t n = ncols( H );
+
+ // check sizes
+ if(
+ ( ncols( U ) != nrows( H ) ) ||
+ ( ncols( H ) != nrows( V ) )
+ ) {
+ std::cerr << "Incompatible sizes in householder_bidiag.\n";
+ return FAILED;
+ }
+
+ //for i in range(min(n,m)):
+ for( size_t i = 0; i < std::min( n, m ); ++i ) {
+ // eliminate column elements below ith diagonal element
+ if( i < std::min( n, m - 1 ) ) {
+ rc = rc ? rc : elminate_below_ith_diag( i, H, U, static_cast< size_t >( 0 ), ring, minus, divide );
+ }
+ // eliminate row elements to the right from (i+1)th diagonal element
+ if( i < std::min( n - 2, m ) ) {
+ auto HT = get_view< alp::view::transpose >( H );
+ auto VT = get_view< alp::view::transpose >( V );
+ rc = rc ? rc : elminate_below_ith_diag( i, HT, VT, static_cast< size_t >( 1 ), ring, minus, divide );
+ }
+ }
+
+ return rc;
+
+ }
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/householder_lu.hpp b/include/alp/algorithms/householder_lu.hpp
new file mode 100644
index 000000000..a4c8b7034
--- /dev/null
+++ b/include/alp/algorithms/householder_lu.hpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include //iota
+#include
+#include
+
+#include
+#include // use from grb
+#ifdef DEBUG
+#include "../tests/utils/print_alp_containers.hpp"
+#endif
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * Computes Householder LU decomposition of general matrix \f$H = LU\f$
+ * where \a H is general (complex or real),
+ * \a L lower trapezoidal,
+ * \a U is upper trapezoidal.
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[out] L output lower trapezoidal matrix
+ * @param[out] U output upper trapezoidal matrix
+ * @param[out] p output permutation vector
+ * @param[in] H input general matrix
+ * @param[in] ring A semiring for operations
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename MatH,
+ typename D = typename MatH::value_type,
+ typename MatL,
+ typename MatU,
+ typename IndexType,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >,
+ std::enable_if_t<
+ std::is_integral< IndexType >::value &&
+ is_matrix< MatH >::value &&
+ is_matrix< MatL >::value &&
+ is_matrix< MatU >::value &&
+ structures::is_a< typename MatH::structure, structures::General >::value &&
+ structures::is_a< typename MatL::structure, structures::LowerTrapezoidal >::value &&
+ structures::is_a< typename MatU::structure, structures::UpperTrapezoidal >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value
+ > * = nullptr
+ >
+ RC householder_lu(
+ const MatH &H,
+ MatL &L,
+ MatU &U,
+ Vector< IndexType > &p,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ RC rc = SUCCESS;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+
+ const size_t m = nrows( H );
+ const size_t n = ncols( H );
+ const size_t kk = std::min( n, m );
+
+ // initialize permutation vector to identity permutation
+ alp::set< alp::descriptors::use_index >( p, alp::Scalar< IndexType >( 0 ) );
+
+ // check sizes
+ if(
+ ( nrows( L ) != nrows( H ) ) ||
+ ( ncols( U ) != ncols( H ) ) ||
+ ( nrows( U ) != kk ) ||
+ ( ncols( L ) != kk )
+ ) {
+#ifdef DEBUG
+ std::cerr << " n, kk, m = " << n << ", " << kk << ", " << m << "\n";
+ std::cerr << "Incompatible sizes in householder_lu.\n";
+#endif
+ return FAILED;
+ }
+
+
+ // L = identity( n )
+ auto Ldiag = alp::get_view< alp::view::diagonal >( L );
+ rc = rc ? rc : alp::set( Ldiag, one );
+ if( rc != SUCCESS ) {
+ std::cerr << " alp::set( L, I ) failed\n";
+ return rc;
+ }
+
+ // Out of place specification of the computation
+ MatH HWork( m, n );
+ rc = rc ? rc : alp::set( HWork, H );
+ if( rc != SUCCESS ) {
+ std::cerr << " alp::set( HWork, H ) failed\n";
+ return rc;
+ }
+
+ Vector< D > PivotVec( n );
+ rc = rc ? rc : alp::set( PivotVec, zero );
+
+ for( size_t k = 0; k < std::min( n, m ); ++k ) {
+ // ===== algorithm =====
+ // a = H[ k, k ]
+ // v = H[ k + 1 : , k ]
+ // w = H[ k, k + 1 : ]
+ // Ak = H[ k + 1 :, k + 1 : ]
+ // v = v / a
+ // Ak = Ak - outer(v,w)
+ // scalar view should replace vector view of length 1 (issue #598)
+ // besides here there are many places in the use cases where this should be changed
+ auto a_view = alp::get_view( HWork, utils::range( k, k + 1 ), k );
+ auto v_view = alp::get_view( HWork, utils::range( k + 1, m ), k );
+ auto w_view = alp::get_view( HWork, k, utils::range( k + 1, n ) );
+ auto Ak_view = alp::get_view( HWork, utils::range( k + 1, m ), utils::range( k + 1, n ) );
+
+ Scalar< D > alpha( zero );
+ rc = rc ? rc : alp::foldl( alpha, a_view, ring.getAdditiveMonoid() );
+
+ // pivoting: find index ipivot
+ size_t ipivot = k;
+ rc = rc ? rc : eWiseLambda(
+ [ &alpha, &ipivot, &k ]( const size_t i, D &val ) {
+ if( std::abs( val ) > std::abs( *alpha ) ) {
+ *alpha = val;
+ ipivot = i + k + 1;
+ }
+ },
+ v_view
+ );
+ // do pivoting if needed
+ if( ipivot > k ) {
+ //p[ ipivot ] <-> p[ k ]
+ auto p1 = alp::get_view( p, utils::range( k, k + 1 ) );
+ auto p2 = alp::get_view( p, utils::range( ipivot, ipivot + 1 ) );
+ Vector< size_t > ptmp( 1 );
+ rc = rc ? rc : alp::set( ptmp, p1 );
+ rc = rc ? rc : alp::set( p1, p2 );
+ rc = rc ? rc : alp::set( p2, ptmp );
+
+ //HWork[ ipivot ] <-> HWork[ k ]
+ auto v1 = alp::get_view( HWork, k, utils::range( 0, n ) );
+ auto v2 = alp::get_view( HWork, ipivot, utils::range( 0, n ) );
+ rc = rc ? rc : alp::set( PivotVec, v1 );
+ rc = rc ? rc : alp::set( v1, v2 );
+ rc = rc ? rc : alp::set( v2, PivotVec );
+ }
+
+ rc = rc ? rc : alp::foldl( v_view, alpha, divide );
+
+ auto w_view_star = conjugate( w_view );
+ auto Reflector = alp::outer( v_view, w_view_star, ring.getMultiplicativeOperator() );
+
+ rc = rc ? rc : alp::foldl( Ak_view, Reflector, minus );
+
+ }
+
+
+ // save the result in L and U
+ auto H_Utrapez = get_view< structures::UpperTrapezoidal >( HWork, utils::range( 0, kk ), utils::range( 0, n ) );
+ rc = rc ? rc : alp::set( U, H_Utrapez );
+
+ auto H_Ltrapez = get_view< structures::LowerTrapezoidal >( HWork, utils::range( 1, m ), utils::range( 0, kk ) );
+ auto L_lowerTrapez = get_view( L, utils::range( 1, m ), utils::range( 0, kk ) );
+ rc = rc ? rc : alp::set( L_lowerTrapez, H_Ltrapez );
+
+ return rc;
+
+ }
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/householder_qr.hpp b/include/alp/algorithms/householder_qr.hpp
new file mode 100644
index 000000000..b62c87f7e
--- /dev/null
+++ b/include/alp/algorithms/householder_qr.hpp
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include // use from grb
+#ifdef DEBUG
+#include "../tests/utils/print_alp_containers.hpp"
+#endif
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * @brief Computes Householder QR decomposition of general matrix \f$H = QR\f$
+ * where \a H is general (complex or real),
+ * \a R is upper triangular (if H is not square,
+ * R is of the same shape with zeros below diagonal), and
+ * \a Q is orthogonal.
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[out] Q output orthogonal matrix such that H = Q T Q^T
+ * @param[out] R output same shape as H with zeros below diagonal
+ * @param[in] H input general matrix
+ * @param[in] ring A semiring for operations
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename D,
+ typename GeneralType,
+ typename GenView,
+ typename GenImfR,
+ typename GenImfC,
+ typename OrthogonalType,
+ typename OrthogonalView,
+ typename OrthogonalImfR,
+ typename OrthogonalImfC,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >
+ >
+ RC householder_qr(
+ Matrix< D, GeneralType, alp::Dense, GenView, GenImfR, GenImfC > &H,
+ Matrix< D, OrthogonalType, alp::Dense, OrthogonalView, OrthogonalImfR, OrthogonalImfC > &Q,
+ Matrix< D, GeneralType, alp::Dense, GenView, GenImfR, GenImfC > &R,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ RC rc = SUCCESS;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+ const size_t n = nrows( H );
+ const size_t m = ncols( H );
+
+#ifdef DEBUG
+ std::cout << " n, m= " << n << ", " << m << "\n";
+#endif
+
+ // Q = identity( n )
+ rc = alp::set( Q, zero );
+ auto Qdiag = alp::get_view< alp::view::diagonal >( Q );
+ rc = rc ? rc : alp::set( Qdiag, one );
+ if( rc != SUCCESS ) {
+ std::cerr << " alp::set( Q, I ) failed\n";
+ return rc;
+ }
+
+ // Out of place specification of the computation
+ Matrix< D, GeneralType, alp::Dense > RR( n, m );
+
+ rc = alp::set( RR, H );
+ if( rc != SUCCESS ) {
+ std::cerr << " alp::set( RR, H ) failed\n";
+ return rc;
+ }
+#ifdef DEBUG
+ print_matrix( " << RR >> ", RR );
+#endif
+
+ // a temporary for storing the alp::mxm result
+ Matrix< D, OrthogonalType, alp::Dense > Qtmp( n, n );
+
+ for( size_t k = 0; k < std::min( n-1, m ); ++k ) {
+#ifdef DEBUG
+ std::string matname( " << RR(" );
+ matname = matname + std::to_string( k );
+ matname = matname + std::string( ") >> " );
+ print_matrix( matname, RR );
+#endif
+
+ //const size_t m = n - k - 1;
+
+ // ===== Begin Computing v =====
+ // v = H[ k + 1 : , k ]
+ // alpha = norm( v ) * v[ 0 ] / norm( v[ 0 ] )
+ // v = v - alpha * e1
+ // v = v / norm ( v )
+
+ auto v_view = alp::get_view( RR, utils::range( k, n ), k );
+ Vector< D, GeneralType, alp::Dense > v( n - k );
+ rc = alp::set( v, v_view );
+ if( rc != SUCCESS ) {
+ std::cerr << " alp::set( v, view ) failed\n";
+ return rc;
+ }
+
+ Scalar< D > alpha( zero );
+ rc = alp::norm2( alpha, v, ring );
+ if( rc != SUCCESS ) {
+ std::cerr << " alp::norm2( alpha, v, ring ) failed\n";
+ return rc;
+ }
+
+ rc = alp::eWiseLambda(
+ [ &alpha, &ring, ÷, &minus ]( const size_t i, D &val ) {
+ if ( i == 0 ) {
+ Scalar< D > norm_v0( std::abs( val ) );
+ Scalar< D > val_scalar( val );
+ alp::foldl( alpha, val_scalar, ring.getMultiplicativeOperator() );
+ alp::foldl( alpha, norm_v0, divide );
+ alp::foldl( val_scalar, alpha, minus );
+ val = *val_scalar;
+ }
+ },
+ v
+ );
+ if( rc != SUCCESS ) {
+ std::cerr << " alp::eWiseLambda( lambda, v ) failed\n";
+ return rc;
+ }
+
+ Scalar< D > norm_v( zero );
+ rc = alp::norm2( norm_v, v, ring );
+ if( rc != SUCCESS ) {
+ std::cerr << " alp::norm2( norm_v, v, ring ) failed\n";
+ return rc;
+ }
+
+ rc = alp::foldl( v, norm_v, divide );
+#ifdef DEBUG
+ print_vector( " v = ", v );
+#endif
+ // ===== End Computing v =====
+
+ // ===== Calculate reflector Qk =====
+ // Q_k = identity( n )
+ typedef typename std::conditional<
+ grb::utils::is_complex< D >::value,
+ structures::Hermitian,
+ structures::Symmetric
+ >::type SymmOrHerm;
+ Matrix< D, SymmOrHerm, alp::Dense > Qk( n );
+ rc = alp::set( Qk, zero );
+ auto Qk_diag = alp::get_view< alp::view::diagonal >( Qk );
+ rc = rc ? rc : alp::set( Qk_diag, one );
+
+ // this part can be rewriten without temp matrix using functors
+ Matrix< D, SymmOrHerm, alp::Dense > vvt( n - k );
+
+ rc = rc ? rc : alp::set( vvt, alp::outer( v, ring.getMultiplicativeOperator() ) );
+ rc = rc ? rc : alp::foldr( Scalar< D >( 2 ), vvt, ring.getMultiplicativeOperator() );
+
+ // Qk = Qk - vvt ( expanded: I - 2 * vvt )
+ auto Qk_view = alp::get_view< SymmOrHerm >(
+ //auto Qk_view = alp::get_view< GeneralType >(
+ Qk,
+ utils::range( k, n ),
+ utils::range( k, n )
+ );
+ rc = rc ? rc : alp::foldl( Qk_view, vvt, minus );
+
+#ifdef DEBUG
+ print_matrix( " << Qk >> ", Qk );
+#endif
+ // ===== End of Calculate reflector Qk ====
+
+ // ===== Update RR =====
+ // RR = Qk * RR
+
+ // QkRR = Qk * RR
+ Matrix< D, GeneralType, alp::Dense > QkRR( n, m );
+ rc = rc ? rc : alp::set( QkRR, zero );
+ rc = rc ? rc : alp::mxm( QkRR, Qk, RR, ring );
+ if( rc != SUCCESS ) {
+ std::cerr << " alp::mxm( QkRR, Qk, RR, ring ); failed\n";
+ return rc;
+ }
+#ifdef DEBUG
+ print_matrix( " << Qk x RR >> ", QkRR );
+#endif
+ rc = rc ? rc : alp::set( RR, QkRR );
+
+#ifdef DEBUG
+ print_matrix( " << RR( updated ) >> ", RR );
+#endif
+ // ===== End of Update R =====
+
+ // ===== Update Q =====
+ // Q = Q * conjugate(transpose(Qk))
+
+ // Qtmp = Q * conjugate(transpose(Qk))
+ rc = rc ? rc : alp::set( Qtmp, zero );
+ if( grb::utils::is_complex< D >::value ) {
+ rc = rc ? rc : alp::mxm(
+ Qtmp,
+ Q,
+ alp::conjugate( alp::get_view< alp::view::transpose >( Qk ) ),
+ ring
+ );
+ } else {
+ rc = rc ? rc : alp::mxm( Qtmp, Q, Qk, ring );
+ }
+
+ // Q = Qtmp
+ rc = rc ? rc : alp::set( Q, Qtmp );
+#ifdef DEBUG
+ print_matrix( " << Q updated >> ", Q );
+#endif
+ // ===== End of Update Q =====
+ }
+
+ // R = RR
+ rc = rc ? rc : alp::set( R, RR );
+ return rc;
+ }
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/householder_tridiag.hpp b/include/alp/algorithms/householder_tridiag.hpp
new file mode 100644
index 000000000..3d946f34c
--- /dev/null
+++ b/include/alp/algorithms/householder_tridiag.hpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include // use from grb
+#include "../tests/utils/print_alp_containers.hpp"
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * @brief Computes Householder tridiagonalization \f$H = QTQ^T\f$
+ * where \a H is real symmetric, \a T is symmetric tridiagonal, and
+ * \a Q is orthogonal.
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[out] Q output orthogonal matrix such that H = Q T Q^T
+ * @param[out] T output symmetric tridiagonal matrix such that H = Q T Q^T
+ * @param[in] H input symmetric matrix
+ * @param[in] ring A semiring for operations
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename D,
+ typename SymmOrHermType,
+ typename SymmOrHermTridiagonalType,
+ typename OrthogonalType,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >
+ >
+ RC householder_tridiag(
+ Matrix< D, OrthogonalType, Dense > &Q,
+ Matrix< D, SymmOrHermTridiagonalType, Dense > &T,
+ Matrix< D, SymmOrHermType, Dense > &H,
+ const Ring & ring = Ring(),
+ const Minus & minus = Minus(),
+ const Divide & divide = Divide() ) {
+
+ RC rc = SUCCESS;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+ const size_t n = nrows( H );
+
+ // Q = identity( n )
+ rc = alp::set( Q, zero );
+ auto Qdiag = alp::get_view< alp::view::diagonal >( Q );
+ rc = rc ? rc : alp::set( Qdiag, one );
+ if( rc != SUCCESS ) {
+ std::cerr << " set( Q, I ) failed\n";
+ return rc;
+ }
+
+ // Out of place specification of the computation
+ Matrix< D, SymmOrHermType, Dense > RR( n );
+
+ rc = set( RR, H );
+ if( rc != SUCCESS ) {
+ std::cerr << " set( RR, H ) failed\n";
+ return rc;
+ }
+#ifdef DEBUG
+ print_matrix( " << RR >> ", RR );
+#endif
+
+ // a temporary for storing the mxm result
+ Matrix< D, OrthogonalType, Dense > Qtmp( n, n );
+
+ for( size_t k = 0; k < n - 2; ++k ) {
+#ifdef DEBUG
+ std::string matname(" << RR(");
+ matname = matname + std::to_string(k);
+ matname = matname + std::string( ") >> ");
+ print_matrix( matname , RR );
+#endif
+
+ const size_t m = n - k - 1;
+
+ // ===== Begin Computing v =====
+ // v = H[ k + 1 : , k ]
+ // alpha = norm( v ) * v[ 0 ] / norm( v[ 0 ] )
+ // v = v - alpha * e1
+ // v = v / norm ( v )
+
+ auto v_view = get_view( RR, k, utils::range( k + 1, n ) );
+ Vector< D, structures::General, Dense > v( n - ( k + 1 ) );
+ rc = set( v, v_view );
+ if( rc != SUCCESS ) {
+ std::cerr << " set( v, view ) failed\n";
+ return rc;
+ }
+
+ Scalar< D > alpha( zero );
+ rc = norm2( alpha, v, ring );
+ if( rc != SUCCESS ) {
+ std::cerr << " norm2( alpha, v, ring ) failed\n";
+ return rc;
+ }
+
+ rc = eWiseLambda(
+ [ &alpha, &ring, ÷, &minus ]( const size_t i, D &val ) {
+ if ( i == 0 ) {
+ Scalar< D > norm_v0( std::abs( val ) );
+ Scalar< D > val_scalar( val );
+ foldl( alpha, val_scalar, ring.getMultiplicativeOperator() );
+ foldl( alpha, norm_v0, divide );
+ foldl( val_scalar, alpha, minus );
+ val = *val_scalar;
+ }
+ },
+ v
+ );
+ if( rc != SUCCESS ) {
+ std::cerr << " eWiseLambda( lambda, v ) failed\n";
+ return rc;
+ }
+
+ Scalar< D > norm_v( zero );
+ rc = norm2( norm_v, v, ring );
+ if( rc != SUCCESS ) {
+ std::cerr << " norm2( norm_v, v, ring ) failed\n";
+ return rc;
+ }
+
+ rc = foldl(v, norm_v, divide );
+#ifdef DEBUG
+ print_vector( " v = ", v );
+#endif
+ // ===== End Computing v =====
+
+ // ===== Calculate reflector Qk =====
+ // Q_k = identity( n )
+ Matrix< D, SymmOrHermType, Dense > Qk( n );
+ rc = alp::set( Qk, zero );
+ auto Qk_diag = alp::get_view< alp::view::diagonal >( Qk );
+ rc = rc ? rc : alp::set( Qk_diag, one );
+
+ // this part can be rewriten without temp matrix using functors
+ Matrix< D, SymmOrHermType, Dense > vvt( m );
+
+ rc = rc ? rc : set( vvt, outer( v, ring.getMultiplicativeOperator() ) );
+ // vvt = 2 * vvt
+ rc = rc ? rc : foldr( Scalar< D >( 2 ), vvt, ring.getMultiplicativeOperator() );
+
+
+#ifdef DEBUG
+ print_matrix( " vvt ", vvt );
+#endif
+
+ // Qk = Qk - vvt ( expanded: I - 2 * vvt )
+ auto Qk_view = get_view< SymmOrHermType >( Qk, utils::range( k + 1, n ), utils::range( k + 1, n ) );
+ if ( grb::utils::is_complex< D >::value ) {
+ rc = rc ? rc : foldl( Qk_view, alp::get_view< alp::view::transpose >( vvt ), minus );
+ } else {
+ rc = rc ? rc : foldl( Qk_view, vvt, minus );
+ }
+
+#ifdef DEBUG
+ print_matrix( " << Qk >> ", Qk );
+#endif
+ // ===== End of Calculate reflector Qk ====
+
+ // ===== Update R =====
+ // Rk = Qk * Rk * Qk
+
+ // RRQk = RR * Qk
+ Matrix< D, structures::Square, Dense > RRQk( n );
+ rc = rc ? rc : set( RRQk, zero );
+ rc = rc ? rc : mxm( RRQk, RR, Qk, ring );
+ if( rc != SUCCESS ) {
+ std::cerr << " mxm( RRQk, RR, Qk, ring ); failed\n";
+ return rc;
+ }
+#ifdef DEBUG
+ print_matrix( " << RR x Qk = >> ", RRQk );
+#endif
+ // RR = Qk * RRQk
+ rc = rc ? rc : set( RR, zero );
+ rc = rc ? rc : mxm( RR, Qk, RRQk, ring );
+
+#ifdef DEBUG
+ print_matrix( " << RR( updated ) >> ", RR );
+#endif
+ // ===== End of Update R =====
+
+ // ===== Update Q =====
+ // Q = Q * Qk
+
+ // Qtmp = Q * Qk
+ rc = rc ? rc : set( Qtmp, zero );
+ rc = rc ? rc : mxm( Qtmp, Q, Qk, ring );
+
+ // Q = Qtmp
+ rc = rc ? rc : set( Q, Qtmp );
+#ifdef DEBUG
+ print_matrix( " << Q updated >> ", Q );
+#endif
+ // ===== End of Update Q =====
+ }
+
+ // T = RR
+
+ rc = rc ? rc : set( T, get_view< SymmOrHermTridiagonalType > ( RR ) );
+ return rc;
+ }
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/qr_eigensolver.hpp b/include/alp/algorithms/qr_eigensolver.hpp
new file mode 100644
index 000000000..d888e6a5f
--- /dev/null
+++ b/include/alp/algorithms/qr_eigensolver.hpp
@@ -0,0 +1,213 @@
+/*
+ * Copyright 2022 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include // use from grb
+#include
+#ifdef DEBUG
+#include "../tests/utils/print_alp_containers.hpp"
+#endif
+
+// TEMPDISABLE should be removed in the final version
+#define TEMPDISABLE
+
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * Calculate eigendecomposition of square matrix T
+ * \f$T = Qdiag(d)Q^T\f$ where
+ * \a T is real
+ * \a Q is orthogonal (columns are eigenvectors).
+ * \a d is vector containing eigenvalues.
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type of minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[out] Q output orthogonal matrix contaning eigenvectors
+ * @param[out] d output vector containg eigenvalues
+ * @param[in] T input symmetric tridiagonal matrix
+ * @param[in] ring A semiring for operations
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename MatA,
+ typename MatQ,
+ typename Vec,
+ typename D = typename MatA::value_type,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >
+ >
+ RC qr_eigensolver(
+ MatA &A,
+ MatQ &Q,
+ Vec &d,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ (void) ring;
+ (void) minus;
+ (void) divide;
+
+ const size_t max_it = 1.e+7;
+ const D tol = 1.e-6;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+
+ RC rc = SUCCESS;
+
+ rc = rc ? rc : alp::set( d, zero );
+
+ const size_t n = nrows( A );
+
+ alp::Matrix< D, structures::General > Atmp( n, n );
+ rc = rc ? rc : alp::set( Atmp, zero );
+
+ // auto A_diag = alp::get_view< alp::view::diagonal >( A );
+
+ auto A_tmp_orig_view = alp::get_view< typename MatA::structure >( Atmp );
+
+ auto A_tmp_diag = alp::get_view< alp::view::diagonal >( Atmp );
+
+ auto A_tmp_supsquare = alp::get_view< alp::structures::Square >( Atmp, utils::range( 0, n - 1 ), utils::range( 1, n ) );
+ auto A_tmp_supdiag = alp::get_view< alp::view::diagonal >( A_tmp_supsquare );
+
+ auto A_tmp_subsquare = alp::get_view< alp::structures::Square >( Atmp, utils::range( 1, n ), utils::range( 0, n - 1 ) );
+ auto A_tmp_subdiag = alp::get_view< alp::view::diagonal >( A_tmp_subsquare );
+
+ rc = rc ? rc : alp::set( A_tmp_orig_view, A );
+ rc = rc ? rc : alp::set( A_tmp_subdiag, A_tmp_supdiag );
+
+// //#ifdef DEBUG
+// print_matrix( " A(input) = ", A );
+// print_matrix( " Atmp = ", Atmp );
+// //#endif
+
+ rc = rc ? rc : alp::set( Q, zero );
+ auto Q_diag = alp::get_view< alp::view::diagonal >( Q );
+ rc = rc ? rc : alp::set(
+ Q_diag,
+ one
+ );
+
+ alp::Matrix< D, structures::Orthogonal > qmat( n );
+ alp::Matrix< D, structures::General > rmat( n, n );
+ MatQ Q_tmp( n, n );
+
+ size_t k1 = 0;
+ size_t k2 = n;
+
+ for( size_t i = 0; i < max_it; ++i ) {
+// //#ifdef DEBUG
+// print_vector( " A_tmp_supdiag ", A_tmp_supdiag );
+// //#endif
+
+ Scalar< D > sdiagnorm1( zero );
+ auto sdiag1 = alp::get_view( A_tmp_supdiag, utils::range( k1, k1 + 1 ) );
+ rc = rc ? rc : alp::norm2( sdiagnorm1, sdiag1, ring );
+ if( std::abs( *sdiagnorm1 ) < tol ) {
+ ++k1;
+ }
+ if ( k1 >= k2 - 1 ){
+ break;
+ }
+
+ Scalar< D > sdiagnorm2( zero );
+ auto sdiag2 = alp::get_view( A_tmp_supdiag, utils::range( k2 - 2, k2 - 1 ) );
+ rc = rc ? rc : alp::norm2( sdiagnorm2, sdiag2, ring );
+ if( std::abs( *sdiagnorm2 ) < tol ) {
+ --k2;
+ }
+ if ( k1 >= k2 - 1 ){
+ break;
+ }
+
+ if( ( k2 - k1 ) != n ) {
+ auto A_tmp_subprob = alp::get_view( Atmp, utils::range( k1, k2 ), utils::range( k1, k2 ) );
+ MatQ qmat2( k2 - k1 );
+ MatA A_sub_mat( k2 - k1 );
+ Vec d_tmp( k2 - k1 );
+ rc = rc ? rc : alp::set( A_sub_mat, zero );
+ auto view_t1 = alp::get_view< typename MatA::structure >( A_tmp_subprob );
+ rc = rc ? rc : alp::set( A_sub_mat, view_t1 );
+ rc = rc ? rc : alp::set( d_tmp, zero );
+// //#ifdef DEBUG
+// print_matrix( " Atmp ", Atmp );
+// print_matrix( " A_tmp_subprob ", A_tmp_subprob );
+// print_matrix( " A_sub_mat ", A_sub_mat );
+// //#endif
+ rc = rc ? rc : alp::set( qmat2, zero );
+ rc = rc ? rc : alp::algorithms::qr_eigensolver( A_sub_mat, qmat2, d_tmp );
+// // #ifdef DEBUG
+// std::cout << " d_tmp : \n";
+// print_vector( " ---> d_tmp ", d_tmp );
+// // #endif
+
+ //Q[:,k1:k2]=Q[:,k1:k2].dot(q1)
+ auto Q_update_view = alp::get_view< structures::OrthogonalColumns >( Q, utils::range( 0, n ), utils::range( k1, k2 ) );
+ alp::Matrix< D, structures::OrthogonalColumns > Q_tmp2( n, k2 - k1 );
+ rc = rc ? rc : alp::set( Q_tmp2, Q_update_view );
+ rc = rc ? rc : alp::set( Q_update_view, zero );
+ rc = rc ? rc : alp::mxm( Q_update_view, Q_tmp2, qmat2, ring );
+
+ rc = rc ? rc : alp::set( A_tmp_subprob, zero );
+ auto A_tmp_diag_update = alp::get_view< alp::view::diagonal >( A_tmp_subprob );
+ rc = rc ? rc : alp::set( A_tmp_diag_update, d_tmp );
+
+ break;
+ } else {
+
+ rc = rc ? rc : alp::set( qmat, zero );
+ rc = rc ? rc : alp::set( rmat, zero );
+ rc = rc ? rc : alp::algorithms::householder_qr( Atmp, qmat, rmat, ring );
+
+ rc = rc ? rc : alp::set( Q_tmp, Q );
+ rc = rc ? rc : alp::set( Q, zero );
+ rc = rc ? rc : alp::mxm( Q, Q_tmp, qmat, ring );
+ rc = rc ? rc : alp::set( Atmp, zero );
+ rc = rc ? rc : alp::mxm( Atmp, rmat, qmat, ring );
+
+ }
+
+// //#ifdef DEBUG
+// print_matrix( " Atmp ", Atmp );
+// //#endif
+
+// if( i % ( n ) == 0 ) {
+ Scalar< D > supdiagnorm( zero );
+ rc = rc ? rc : alp::norm2( supdiagnorm, A_tmp_supdiag, ring );
+ if( std::abs( *supdiagnorm ) < tol * tol ) {
+ break;
+ }
+// }
+ }
+
+ rc = rc ? rc : alp::set( d, A_tmp_diag );
+
+ return rc;
+ }
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/svd.hpp b/include/alp/algorithms/svd.hpp
new file mode 100644
index 000000000..d008cd221
--- /dev/null
+++ b/include/alp/algorithms/svd.hpp
@@ -0,0 +1,478 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include // use from grb
+#include
+#ifdef DEBUG
+#include "../tests/utils/print_alp_containers.hpp"
+#endif
+
+namespace alp {
+
+ namespace algorithms {
+
+
+ /**
+ * Calculate Givens rotation 2x2 matrix elements and overwrite
+ * the content of matrix G. Givens rotation elements G=[[c,-s*],[s,c]] are determined by
+ * input vector v=[a,b], so that G v = [r,0]
+ */
+ // for a more general purpose
+ // a more stable implementations is needed
+ // todo: move to utils?
+ template<
+ typename MatG,
+ typename VecV,
+ typename D = typename MatG::value_type,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_matrix< MatG >::value &&
+ is_vector< VecV >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value
+ > * = nullptr
+ >
+ RC Givens(
+ MatG &G,
+ VecV &v,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ (void) minus;
+ RC rc = SUCCESS;
+
+#ifdef DEBUG
+ if( ( nrows( G ) != 2 ) ||
+ ( ncols( G ) != 2 ) ||
+ ( size( v ) != 2 )
+ ) {
+ std::cerr << "Wrong size in Givens.";
+ return FAILED;
+ }
+#endif
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+
+ // c = abs(a) / sqrt(abs(a)**2 + abs(b)**2)
+ // s = (a/abs(a)) * conjugate(b) / sqrt(abs(a)**2 + abs(b)**2)
+ // return(array([[c,-conjugate(s)],[(s),c]]))
+ Scalar< D > c( zero );
+ Scalar< D > s( zero );
+ Scalar< D > d( zero );
+ rc = rc ? rc : alp::norm2( d, v, ring );
+ auto a = get_view( v, utils::range( 0, 1 ) );
+ auto b = get_view( v, utils::range( 1, 2 ) );
+
+ rc = rc ? rc : alp::norm2( c, a, ring );
+ rc = rc ? rc : alp::foldl( s, a, ring.getAdditiveMonoid() );
+
+ rc = rc ? rc : alp::foldl( s, c, divide );
+ rc = rc ? rc : alp::foldl( s, conjugate( b ), ring.getMultiplicativeMonoid() );
+
+ // return(array([[c,-conjugate(s)],[(s),c]]),r)
+ auto G11 = get_view( G, 0, utils::range( 0, 1 ) );
+ auto G12 = get_view( G, 0, utils::range( 1, 2 ) );
+ auto G21 = get_view( G, 1, utils::range( 0, 1 ) );
+ auto G22 = get_view( G, 1, utils::range( 1, 2 ) );
+ rc = rc ? rc : alp::set( G, zero );
+ rc = rc ? rc : alp::foldl( G11, c, ring.getAdditiveOperator() );
+ rc = rc ? rc : alp::foldl( G22, c, ring.getAdditiveOperator() );
+ rc = rc ? rc : alp::foldl( G21, s, ring.getAdditiveOperator() );
+ rc = rc ? rc : alp::set( G12, conjugate( G21 ) );
+ rc = rc ? rc : alp::foldl( G12, Scalar< D >( -1 ), ring.getMultiplicativeOperator() );
+ rc = rc ? rc : alp::foldl( G, d, divide );
+ return rc;
+ }
+
+
+ /** Golub-Kahan SVD step */
+ template<
+ typename MatB,
+ typename MatU,
+ typename MatV,
+ typename D = typename MatB::value_type,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_matrix< MatB >::value &&
+ is_matrix< MatU >::value &&
+ is_matrix< MatV >::value &&
+ structures::is_a< typename MatB::structure, structures::General >::value &&
+ structures::is_a< typename MatU::structure, structures::OrthogonalColumns >::value &&
+ structures::is_a< typename MatV::structure, structures::OrthogonalRows >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value
+ > * = nullptr
+ >
+ RC gk_svd_step(
+ MatU &U,
+ MatB &B,
+ MatV &V,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ RC rc = SUCCESS;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+
+ const size_t m = nrows( B );
+ const size_t n = ncols( B );
+ const size_t k = std::min( m, n );
+
+ // get lambda
+ // calculate eigenvalue llambda of
+ // which is closer to t22
+ auto BEnd = get_view( B, utils::range( k - 3, k ), utils::range( k - 2, k ) );
+ Matrix< D, structures::Square, Dense > BEndSquare( 2, 2 );
+ rc = rc ? rc : alp::set( BEndSquare, zero );
+ auto BEndT = get_view< alp::view::transpose >( BEnd );
+ auto BEndT_star = conjugate( BEndT );
+ rc = rc ? rc : mxm( BEndSquare, BEndT_star, BEnd, ring );
+
+ auto tdiag = get_view< alp::view::diagonal >( BEndSquare );
+ auto t11 = get_view( BEndSquare, 0, utils::range( 0, 1 ) );
+ auto t12 = get_view( BEndSquare, 0, utils::range( 1, 2 ) );
+ auto t22 = get_view( BEndSquare, 1, utils::range( 1, 2 ) );
+
+ Scalar< D > llambda( zero );
+ rc = rc ? rc : alp::foldl( llambda, tdiag, ring.getAdditiveMonoid() );
+ rc = rc ? rc : alp::foldl( llambda, alp::Scalar< D >( 2 ), divide );
+
+ Scalar< D > bb( zero );
+ rc = rc ? rc : alp::foldl( bb, t11, ring.getAdditiveMonoid() );
+ rc = rc ? rc : alp::foldl( bb, Scalar< D >( -1 ), ring.getMultiplicativeOperator() );
+ rc = rc ? rc : alp::foldl( bb, t22, ring.getAdditiveMonoid() );
+ rc = rc ? rc : alp::foldl( bb, alp::Scalar< D >( 2 ), divide );
+
+ Scalar< D > cc( zero );
+ rc = rc ? rc : alp::foldl( cc, conjugate( t12 ), ring.getAdditiveMonoid() );
+
+ Vector< D > DD( 2 );
+ rc = rc ? rc : alp::set( DD, zero );
+ auto DD0 = get_view( DD, utils::range( 0, 1 ) );
+ auto DD1 = get_view( DD, utils::range( 1, 2 ) );
+ rc = rc ? rc : alp::foldl( DD0, bb, ring.getAdditiveOperator() );
+ rc = rc ? rc : alp::foldl( DD1, cc, ring.getAdditiveOperator() );
+ rc = rc ? rc : alp::set( bb, zero );
+ rc = rc ? rc : alp::norm2( bb, DD, ring );
+
+ Scalar< D > t11scal( zero );
+ Scalar< D > t22scal( zero );
+ rc = rc ? rc : alp::foldl( t11scal, t11, ring.getAdditiveMonoid() );
+ rc = rc ? rc : alp::foldl( t22scal, t22, ring.getAdditiveMonoid() );
+
+ if ( std::real( *t11scal ) > std::real( *t22scal ) ) {
+ rc = rc ? rc : alp::foldl( llambda, bb, minus );
+ } else {
+ rc = rc ? rc : alp::foldl( llambda, bb, ring.getAdditiveOperator() );
+ }
+ // end of get lambda
+
+ Vector< D > rotvec( 2 );
+ auto Brow = get_view( B, 0, utils::range( 0, 2 ) );
+ auto B00 = get_view( B, 0, utils::range( 0, 1 ) );
+ Scalar< D > b00star( zero );
+ rc = rc ? rc : alp::foldl( b00star, conjugate( B00 ), ring.getAdditiveMonoid() );
+ rc = rc ? rc : alp::set( rotvec, Brow );
+ rc = rc ? rc : alp::foldl( rotvec, b00star, ring.getMultiplicativeOperator() );
+
+ auto rotvec0 = get_view( rotvec, utils::range( 0, 1 ) );
+ rc = rc ? rc : alp::foldl( rotvec0, llambda, minus );
+
+ Matrix< D, structures::Square, Dense > G( 2, 2 );
+ rc = rc ? rc : alp::set( G, zero );
+ rc = rc ? rc : Givens( G, rotvec );
+ auto Gdiag = get_view< alp::view::diagonal >( G );
+ auto Gstar = conjugate( G );
+ auto GT = get_view< alp::view::transpose >( G );
+ auto GTstar = conjugate( GT );
+
+ for( size_t i = 0; i < k - 1; ++i ){
+ // B[max(i-1,0):i+2,i:i+2]=B[max(i-1,0):i+2,i:i+2].dot(G)
+ auto Bblock1 = get_view( B, utils::range( ( i == 0 ? 0 : i - 1 ), i + 2 ), utils::range( i, i + 2 ) );
+ Matrix< D, structures::General, Dense > TMP1( nrows( Bblock1 ), ncols( Bblock1 ) );
+ rc = rc ? rc : alp::set( TMP1, Bblock1 );
+ rc = rc ? rc : alp::set( Bblock1, zero );
+ rc = rc ? rc : mxm( Bblock1, TMP1, G, ring );
+
+ // update V
+ // G2=G-identity(2).astype(complex)
+ rc = rc ? rc : alp::foldl( Gdiag, one, minus );
+ // V[i:i+2,:]=V[i:i+2,:] + conjugate(G2).dot(V[i:i+2,:])
+ auto Vstrip = get_view< structures::OrthogonalRows >( V, utils::range( i, i + 2 ), utils::range( 0, ncols( V ) ) );
+ Matrix< D, structures::OrthogonalRows, Dense > TMPStrip1( nrows( Vstrip ), ncols( Vstrip ) );
+ rc = rc ? rc : alp::set( TMPStrip1, Vstrip );
+ rc = rc ? rc : mxm( Vstrip, GTstar, TMPStrip1, ring );
+
+ // B[i:i+2,i:i+3]=G.T.dot(B[i:i+2,i:i+3])
+ auto Bblock2 = get_view( B, utils::range( i, i + 2 ), utils::range( i, std::min( i + 3, n ) ) );
+ Matrix< D, structures::General, Dense > TMP2( nrows( Bblock2 ), ncols( Bblock2 ) );
+ auto rotvec2 = get_view( B, utils::range( i, i + 2 ), i );
+ rc = rc ? rc : Givens( G, rotvec2 );
+ rc = rc ? rc : alp::set( TMP2, Bblock2 );
+ rc = rc ? rc : alp::set( Bblock2, zero );
+ rc = rc ? rc : mxm( Bblock2, GT, TMP2, ring );
+
+ // update U
+ // G2=G-identity(2).astype(complex)
+ rc = rc ? rc : alp::foldl( Gdiag, one, minus );
+ // U[:,k:k+2]=U[:,k:k+2]+U[:,k:k+2].dot(conjugate(G2))
+ auto Ustrip = get_view< structures::OrthogonalColumns >( U, utils::range( 0, nrows( U ) ), utils::range( i, i + 2 ) );
+ Matrix< D, structures::OrthogonalColumns, Dense > TMPStrip2( nrows( Ustrip ), ncols( Ustrip ) );
+ rc = rc ? rc : alp::set( TMPStrip2, Ustrip );
+ rc = rc ? rc : mxm( Ustrip, TMPStrip2, Gstar, ring );
+
+ if( i + 2 < k ) {
+ auto rotvec3 = get_view( B, i, utils::range( i + 1, i + 3 ) );
+ rc = rc ? rc : Givens( G, rotvec3 );
+ } else {
+ rc = rc ? rc : Givens( G, rotvec2 );
+ }
+ }
+
+ return rc;
+ }
+
+ /** Golub-Khan SVD algorithm */
+ template<
+ typename MatB,
+ typename MatU,
+ typename MatV,
+ typename D = typename MatB::value_type,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_matrix< MatB >::value &&
+ is_matrix< MatU >::value &&
+ is_matrix< MatV >::value &&
+ structures::is_a< typename MatB::structure, structures::General >::value &&
+ structures::is_a< typename MatU::structure, structures::Orthogonal >::value &&
+ structures::is_a< typename MatV::structure, structures::Orthogonal >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value
+ > * = nullptr
+ >
+ RC svd_solve(
+ MatU &U,
+ MatB &B,
+ MatV &V,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ RC rc = SUCCESS;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+
+ const size_t m = nrows( B );
+ const size_t n = ncols( B );
+ const size_t k = std::min( m, n );
+
+ const double tol = 1.e-12;
+ const size_t maxit = k * 5;
+
+ auto Bsupsquare = get_view( B, utils::range( 0, k - 1 ) , utils::range( 1, k ) );
+ auto superdiagonal = get_view< alp::view::diagonal >( Bsupsquare );
+
+ size_t i1 = 0;
+ size_t i2 = k;
+
+ rc = rc ? rc : algorithms::householder_bidiag( U, B, V, ring, minus, divide );
+
+ // eliminate superdiagonal elements via Givens rotations
+ for( size_t i = 0; i < maxit; ++i ) {
+ // todo: In convergence test: replace absolute with relative tolerance check
+ // todo: check for zeroes in diagonal, if any do Givens rotatations
+ // to move the zero from diagonal to superdiagonal
+ // (no likely to affect randomly generated tests)
+
+ // check for zeros in superdiagonal, if any,
+ // move i1 and i2 to bound non-zero part of superdiagonal
+ for( ; i1 < i2; ++i1 ) {
+ auto B_l = get_view( superdiagonal, utils::range( i1, i1 + 1 ) );
+ Scalar< D > bnorm( zero );
+ rc = rc ? rc : alp::norm2( bnorm, B_l, ring );
+ if( std::abs( *bnorm ) > tol ) {
+ break;
+ }
+ }
+ for( ; i2 > i1; --i2 ) {
+ auto B_l = get_view( superdiagonal, utils::range( i2 - 2, i2 - 1 ) );
+ Scalar< D > bnorm( zero );
+ rc = rc ? rc : alp::norm2( bnorm, B_l, ring );
+ if( std::abs( *bnorm ) > tol ) {
+ break;
+ }
+ }
+ if( i2 <= i1 ){
+ break;
+ }
+
+ auto Bview = get_view( B, utils::range( i1, i2 ), utils::range( i1, i2 ) );
+ auto Uview = get_view< structures::OrthogonalColumns >( U, utils::range( 0, nrows( U ) ), utils::range( i1, i2 ) );
+ auto Vview = get_view< structures::OrthogonalRows >( V, utils::range( i1, i2 ), utils::range( 0, ncols( V ) ) );
+
+ rc = rc ? rc : algorithms::gk_svd_step( Uview, Bview, Vview, ring, minus, divide );
+
+ // check convergence
+ Scalar< D > sup_diag_norm( zero );
+ rc = rc ? rc : alp::norm2( sup_diag_norm, superdiagonal, ring );
+
+ if( std::abs( *sup_diag_norm ) < tol ) {
+ break ;
+ }
+ }
+
+ // Rotate diagonal elements in complex plane
+ // in order to have them on real axis (positive singular values)
+ auto BSquare = alp::get_view( B, utils::range( 0, k ), utils::range( 0, k ) );
+ auto DiagBview = alp::get_view< alp::view::diagonal >( BSquare );
+ for( size_t i = 0; i < size( DiagBview ); ++i ) {
+ Scalar< D > sigmaiphase( zero );
+ Scalar< D > sigmainorm( zero );
+ auto U_vi = get_view( U, utils::range( 0, nrows( U ) ), i );
+ auto B_vi = get_view( B, i, utils::range( 0, ncols( B ) ) );
+ auto d_i = get_view( DiagBview, utils::range( i, i + 1 ) );
+ rc = rc ? rc : alp::norm2( sigmainorm, d_i, ring );
+ if( std::abs( *sigmainorm ) > tol ) {
+ rc = rc ? rc : alp::foldl( sigmaiphase, d_i, ring.getAdditiveMonoid() );
+ rc = rc ? rc : alp::foldl( sigmaiphase, sigmainorm, divide );
+ rc = rc ? rc : alp::foldl( U_vi, sigmaiphase, ring.getMultiplicativeOperator() );
+ rc = rc ? rc : alp::foldl( B_vi, sigmaiphase, divide );
+ }
+ }
+
+ return rc;
+ }
+
+
+
+ /**
+ * Computes singular value decomposition (inplace) of a
+ * general matrix \f$H(input) = U B V \f$
+ * where \a H is general (complex or real),
+ * \a U orthogonal and \a V are orthogonal, \a B is nonzero only on diagonal
+ * and it contains positive singular values.
+ * If convergenece is not reached B will contain nonzeros on superdiagonal.
+ *
+ * @tparam MatH type of general matrix on which we perform SVD
+ * @tparam MatU type of orthogonal matrix U, output of SVD
+ * @tparam MatS type of rectangulardiagonal matrix S,
+ * output which has positive nonzero elements on diagonal
+ * @tparam MatV type of orthogonal matrix V, output of SVD
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[out] U orthogonal matrix
+ * @param[out] V orthogonal matrix
+ * @param[in,out] B input general matrix, output bidiagonal matrix
+ * @param[in] ring A semiring for operations
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename MatH,
+ typename MatU,
+ typename MatS,
+ typename MatV,
+ typename D = typename MatH::value_type,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_matrix< MatH >::value &&
+ is_matrix< MatU >::value &&
+ is_matrix< MatS >::value &&
+ is_matrix< MatV >::value &&
+ structures::is_a< typename MatH::structure, structures::General >::value &&
+ structures::is_a< typename MatU::structure, structures::Orthogonal >::value &&
+ structures::is_a< typename MatS::structure, structures::RectangularDiagonal >::value &&
+ structures::is_a< typename MatV::structure, structures::Orthogonal >::value &&
+ is_semiring< Ring >::value &&
+ is_operator< Minus >::value &&
+ is_operator< Divide >::value
+ > * = nullptr
+ >
+ RC svd(
+ const MatH &H,
+ MatU &U,
+ MatS &S,
+ MatV &V,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ RC rc = SUCCESS;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+
+ const size_t m = nrows( H );
+ const size_t n = ncols( H );
+
+ // inplace work on B
+ MatH B( m, n );
+ rc = rc ? rc : set( B, H );
+
+ rc = rc ? rc : set( U, zero );
+ rc = rc ? rc : set( V, zero );
+
+ // set U to Identity
+ auto DiagU = alp::get_view< alp::view::diagonal >( U );
+ rc = rc ? rc : alp::set( U, zero );
+ rc = rc ? rc : alp::set( DiagU, one );
+ // set V to Identity
+ auto DiagV = alp::get_view< alp::view::diagonal >( V );
+ rc = rc ? rc : alp::set( V, zero );
+ rc = rc ? rc : alp::set( DiagV, one );
+
+ if( n > m ) {
+ auto UT = get_view< alp::view::transpose >( U );
+ auto BT = get_view< alp::view::transpose >( B );
+ auto VT = get_view< alp::view::transpose >( V );
+ rc = rc ? rc : algorithms::svd_solve( VT, BT, UT, ring, minus, divide );
+ } else {
+ rc = rc ? rc : algorithms::svd_solve( U, B, V, ring, minus, divide );
+ }
+
+ // update S
+ auto DiagS = alp::get_view< alp::view::diagonal >( S );
+ auto DiagB = alp::get_view< alp::view::diagonal >( B );
+ rc = rc ? rc : set( S, zero );
+ rc = rc ? rc : set( DiagS, DiagB );
+
+ return rc;
+ }
+
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/symherm_posdef_inverse.hpp b/include/alp/algorithms/symherm_posdef_inverse.hpp
new file mode 100644
index 000000000..e9ba929df
--- /dev/null
+++ b/include/alp/algorithms/symherm_posdef_inverse.hpp
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2022 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * Computes the Cholesky decomposition U^HU = H of a real symmetric
+ * positive definite (SPD) (or complex Hermitian positive definite)
+ * matrix H where \a U is upper triangular, and ^H is transpose in
+ * the real case and transpose + complex conjugate in the complex case.
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @param[out] L output lower triangular matrix
+ * @param[in] H input real symmetric positive definite matrix
+ * or complex hermitian positive definite matrix
+ * @param[in] ring The semiring used in the computation
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename MatH,
+ typename D = typename MatH::value_type,
+ typename Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ std::enable_if_t<
+ is_matrix< MatH >::value &&
+ (
+ (
+ !grb::utils::is_complex< D >::value &&
+ structures::is_a< typename MatH::structure, structures::SymmetricPositiveDefinite >::value
+ ) || (
+ grb::utils::is_complex< D >::value &&
+ structures::is_a< typename MatH::structure, structures::HermitianPositiveDefinite >::value
+ )
+ ) &&
+ is_semiring< Ring >::value
+ > * = nullptr
+ >
+ RC symherm_posdef_inverse(
+ MatH &Hinv,
+ const MatH &H,
+ const Ring &ring = Ring()
+ ) {
+ RC rc = SUCCESS;
+
+ const alp::Scalar< D > zero( ring.template getZero< D >() );
+ const alp::Scalar< D > one( ring.template getOne< D >() );
+
+ if( nrows( Hinv ) != nrows( H ) ) {
+ std::cerr << "Incompatible sizes in symherm_posdef_inverse.\n";
+ return FAILED;
+ }
+
+ const size_t N = nrows( H );
+
+ alp::Matrix< D, structures::UpperTriangular, Dense > U( N );
+
+ rc = rc ? rc : alp::set( U, zero );
+
+ rc = rc ? rc : algorithms::cholesky_uptr( U, H, ring );
+#ifdef DEBUG
+ print_matrix( " U ", U );
+#endif
+ // H = U^H U
+ // H^-1 = U^-1 U^H-1
+ alp::Matrix< D, structures::UpperTriangular, Dense > Uinv( N );
+ rc = rc ? rc : alp::set( Uinv, zero );
+ auto Uinvdiag = alp::get_view< alp::view::diagonal >( Uinv );
+ auto UinvT = alp::get_view< alp::view::transpose >( Uinv );
+ rc = rc ? rc : alp::set( Uinvdiag, one );
+ auto UT = alp::get_view< alp::view::transpose >( U );
+ for( size_t i = 0; i < N; ++i ){
+ auto x = alp::get_view( UinvT, utils::range( i, N ), i );
+ auto UT_submatview = alp::get_view( UT, utils::range( i, N ), utils::range( i, N ) );
+ rc = rc ? rc : alp::algorithms::forwardsubstitution( UT_submatview, x, ring );
+ }
+#ifdef DEBUG
+ print_matrix( " Uinv ", Uinv );
+#endif
+ rc = rc ? rc : alp::set( Hinv, zero );
+ // conjugate(linv.T).dot(linv)
+ auto UinvTvstar = conjugate( UinvT );
+ rc = rc ? rc : alp::mxm( Hinv, Uinv, UinvTvstar, ring );
+#ifdef DEBUG
+ print_matrix( " Hinv ", Hinv );
+#endif
+ return rc;
+ }
+
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/algorithms/symm_tridiag_eigensolver.hpp b/include/alp/algorithms/symm_tridiag_eigensolver.hpp
new file mode 100644
index 000000000..e9e29d2ea
--- /dev/null
+++ b/include/alp/algorithms/symm_tridiag_eigensolver.hpp
@@ -0,0 +1,539 @@
+/*
+ * Copyright 2022 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include
+#include
+
+#include
+#include // use from grb
+#ifdef DEBUG
+#include "../tests/utils/print_alp_containers.hpp"
+#endif
+
+// TEMPDISABLE should be removed in the final version
+#define TEMPDISABLE
+
+
+namespace alp {
+
+ namespace algorithms {
+
+ /**
+ * find zero of secular equation in interval
+ * using bisection
+ * this is not an optimal algorithm and there are many
+ * more efficient implementation
+ */
+ template<
+ typename D,
+ typename VectorD,
+ typename VectorV,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_vector< VectorD >::value &&
+ is_vector< VectorV >::value
+ > * = nullptr
+ >
+ RC bisec_sec_eq(
+ Scalar< D > &lambda,
+ const VectorD &d,
+ // Vector v should be const, but that would disable eWiseLambda,
+ // to be resolved in the future
+ VectorV &v,
+ const Scalar< D > &a,
+ const Scalar< D > &b,
+ const D tol = 1.e-10,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ RC rc = SUCCESS;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+
+ Scalar< D > x0( a );
+ rc = rc ? rc : alp::foldl( x0, b, ring.getAdditiveOperator() );
+ rc = rc ? rc : alp::foldl( x0, Scalar< D >( 2 ), divide );
+
+ Scalar< D > delta( a );
+ rc = rc ? rc : alp::foldl( delta, b, minus );
+ *delta = std::abs( *delta );
+
+ if( *delta < tol ) {
+ alp::set( lambda, x0 );
+ return rc;
+ }
+
+ //fx0=1+sum(v**2/(d-x0))
+ Scalar< D > fx0( one );
+ rc = rc ? rc : eWiseLambda(
+ [ &d, &x0, &fx0, &ring, &minus, ÷ ]( const size_t i, D &val ) {
+ Scalar< D > alpha( val );
+ Scalar< D > beta( d[ i ] );
+ alp::foldl( alpha, Scalar< D > ( val ), ring.getMultiplicativeOperator() );
+ alp::foldl( beta, x0, minus );
+ alp::foldl( alpha, beta, divide );
+ alp::foldl( fx0, alpha, ring.getAdditiveOperator() );
+ },
+ v
+ );
+
+ if( std::abs( *fx0 ) < tol ) {
+ alp::set( lambda, x0 );
+ return rc;
+ }
+
+ if( *fx0 < *zero ) {
+ rc = rc ? rc : bisec_sec_eq( lambda, d, v, x0, b, tol );
+ } else {
+ rc = rc ? rc : bisec_sec_eq( lambda, d, v, a, x0, tol );
+ }
+
+ return rc;
+ }
+
+
+ /**
+ * Calculate eigendecomposition of system D + vvt
+ * \f$D = diag(d)$ is diagonal matrix and
+ * \a vvt outer product outer(v,v)
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type of minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[out] Egvecs output orthogonal matrix contaning eigenvectors
+ * @param[out] egvals output vector containg eigenvalues
+ * @param[in] ring A semiring for operations
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename VectorEgVals,
+ typename VectorD,
+ typename VectorV,
+ typename OrthogonalMat,
+ typename D = typename OrthogonalMat::value_type,
+ class Ring = Semiring<
+ operators::add< D >,
+ operators::mul< D >,
+ identities::zero,
+ identities::one
+ >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >,
+ std::enable_if_t<
+ is_vector< VectorD >::value &&
+ is_vector< VectorV >::value &&
+ is_matrix< OrthogonalMat >::value &&
+ alp::structures::is_a< typename OrthogonalMat::structure, alp::structures::Orthogonal >::value
+ > * = nullptr
+ >
+ RC eigensolveDiagPlusOuter(
+ VectorEgVals &egvals,
+ OrthogonalMat &Egvecs,
+ VectorD &d,
+ VectorV &v,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ RC rc = SUCCESS;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+ const size_t n = nrows( Egvecs );
+ const double eps = 1.e-7;
+
+ // all egvec/val are trivial when the corresponding
+ // element of v is zero
+ size_t count_direct_egvc = 0;
+ size_t count_non_direct_egvc = 0;
+
+ std::vector< size_t > direct_egvc_indx( n, 0 );
+ std::vector< size_t > non_direct_egvc_indx( n, 0 );
+ // the following loop should be replaced by ALP primitives
+ // since v is not sorted it seems that another sort is needed
+ // currently there is no simple way to impement this in ALP
+ for( size_t i = 0; i < n; i++ ) {
+ if( std::abs( v[ i ] ) < eps ) {
+ // in these cases equals are canonical vectors
+ // and eigenvalues are d[i]
+ direct_egvc_indx[ count_direct_egvc ] = i ;
+ ++count_direct_egvc;
+ } else {
+ // these cases require complicated egval formula
+ // and for cases where egval is close to the singular point
+ // different algorithm for eigenvectors needs to be implemented
+ non_direct_egvc_indx[ count_non_direct_egvc ] = i;
+ ++count_non_direct_egvc;
+ }
+ }
+ direct_egvc_indx.resize( count_direct_egvc );
+ non_direct_egvc_indx.resize( count_non_direct_egvc );
+ alp::Vector< size_t > select_direct( count_direct_egvc );
+ alp::Vector< size_t > select_non_direct( count_non_direct_egvc );
+ alp::buildVector( select_direct, direct_egvc_indx.begin(), direct_egvc_indx.end() );
+ alp::buildVector( select_non_direct, non_direct_egvc_indx.begin(), non_direct_egvc_indx.end() );
+
+#ifdef DEBUG
+ std::cout << " ----> count_direct_egvc = " << count_direct_egvc << "\n";
+ std::cout << " ----> count_non_direct_egvc = " << count_non_direct_egvc << "\n";
+#endif
+ auto egvals_direct = alp::get_view< alp::structures::General >( egvals, select_direct );
+ auto egvals_non_direct = alp::get_view< alp::structures::General >( egvals, select_non_direct );
+
+ auto Egvecs_non_direct = alp::get_view< alp::structures::Orthogonal >(
+ Egvecs, select_non_direct, select_non_direct
+ );
+
+ // copy d -> egvals for direct part
+ rc = rc ? rc : alp::set(
+ egvals_direct,
+ get_view< alp::structures::General >( d, select_direct )
+ );
+
+ auto d_view = alp::get_view< alp::structures::General >( d, select_non_direct );
+ auto v_view = alp::get_view< alp::structures::General >( v, select_non_direct );
+
+#ifdef DEBUG
+ print_vector( "eigensolveDiagPlusOuter: d ", d );
+ print_vector( "eigensolveDiagPlusOuter: v ", v );
+ print_vector( "eigensolveDiagPlusOuter: d_view ", d_view );
+ print_vector( "eigensolveDiagPlusOuter: v_view ", v_view );
+#endif
+
+ // vec_b = {d_view[1], d_view[2], ... , d_view[N-1], d_view[N]+dot(v,v) }
+ size_t nn = alp::size( d_view );
+ alp::Vector< D > vec_b( nn );
+ auto v1 = alp::get_view( vec_b, utils::range( 0, nn - 1 ) );
+ auto v2 = alp::get_view( d_view, utils::range( 1, nn ) );
+ rc = rc ? rc : alp::set( v1, v2 );
+ auto v3 = alp::get_view( vec_b, utils::range( nn - 1, nn ) );
+ auto v4 = alp::get_view( d_view, utils::range( nn - 1, nn ) );
+ rc = rc ? rc : alp::set( v3, v4 );
+
+ // eWiseLambda currently does not work with select view
+ // dot() does not work with select view
+ // as a (temp) solution we use temp vectors
+ alp::Vector< D > vec_temp_egvals( nn );
+ alp::Vector< D > vec_temp_d( nn );
+ alp::Vector< D > vec_temp_v( nn );
+
+ rc = rc ? rc : alp::set( vec_temp_egvals, zero );
+ rc = rc ? rc : alp::set( vec_temp_d, d_view );
+ rc = rc ? rc : alp::set( vec_temp_v, v_view );
+
+ Scalar< D > alpha( zero );
+ // there is a bug in dot() when called on select views
+ //rc = rc ? rc : alp::dot( alpha, d_view, d_view, ring );
+ rc = rc ? rc : alp::dot( alpha, vec_temp_v, vec_temp_v, ring );
+
+ auto v5 = alp::get_view( vec_b, utils::range( alp::size( vec_b ) - 1, alp::size( vec_b ) ) );
+ rc = rc ? rc : alp::foldl( v5, alpha, ring.getAdditiveOperator() );
+
+ rc = rc ? rc : alp::eWiseLambda(
+ [ &d_view, &vec_temp_v, &vec_b, &ring, ÷ ]( const size_t i, D &val ) {
+ Scalar< D > a( d_view[ i ] );
+ Scalar< D > b( vec_b[ i ] );
+ Scalar< D > w( a );
+ alp::foldl( w, b, ring.getAdditiveOperator() );
+ alp::foldl( w, Scalar< D >( 2 ), divide );
+ bisec_sec_eq( w, d_view, vec_temp_v, a, b );
+ val = *w;
+ },
+ vec_temp_egvals
+ );
+ rc = rc ? rc : alp::set( egvals_non_direct, vec_temp_egvals );
+
+ Matrix< D, structures::General, Dense > tmp_egvecs( nn, nn );
+ Matrix< D, structures::General, Dense > tmp_denominator( nn, nn );
+
+ alp::Vector< D > ones( nn );
+ rc = rc ? rc : alp::set( ones, one );
+ rc = rc ? rc : alp::set(
+ tmp_egvecs,
+ alp::outer( vec_temp_v, ones, ring.getMultiplicativeOperator() )
+ );
+
+ auto ddd = alp::outer( vec_temp_d, ones, ring.getMultiplicativeOperator() );
+ auto lll = alp::outer( ones, egvals_non_direct, ring.getMultiplicativeOperator() );
+ rc = rc ? rc : alp::set( tmp_denominator, ddd );
+ rc = rc ? rc : alp::foldl( tmp_denominator, lll, minus );
+ rc = rc ? rc : alp::foldl( tmp_egvecs, tmp_denominator, divide );
+
+ // while fold matrix -> vector would be a solution to
+ // normalize columns in tmp_egvecs,
+ // here we abuse the syntax and use eWiseLambda.
+ // Once fold matrix -> vector implemented, the next section should be rewritten
+ rc = rc ? rc : alp::eWiseLambda(
+ [ &tmp_egvecs, &nn, &ring, ÷, &zero ]( const size_t i, D &val ) {
+ (void) val;
+ auto egvec_i = get_view( tmp_egvecs, utils::range( 0, nn ), i );
+ Scalar< D > norm_i( zero );
+ alp::norm2( norm_i, egvec_i, ring );
+ alp::foldl( egvec_i, norm_i , divide );
+ },
+ ones
+ );
+
+ // update results
+ auto egvecs_view = alp::get_view( Egvecs_non_direct, utils::range( 0, nn ), utils::range( 0, nn ) );
+ auto tmp_egvecs_orth_view = alp::get_view< typename OrthogonalMat::structure >( tmp_egvecs );
+ rc = rc ? rc : alp::set( egvecs_view, tmp_egvecs_orth_view );
+
+ return rc;
+ }
+
+
+ /**
+ * Calculate eigendecomposition of symmetric tridiagonal matrix T
+ * \f$T = Qdiag(d)Q^T\f$ where
+ * \a T is real symmetric tridiagonal
+ * \a Q is orthogonal (columns are eigenvectors).
+ * \a d is vector containing eigenvalues.
+ *
+ * @tparam D Data element type
+ * @tparam Ring Type of the semiring used in the computation
+ * @tparam Minus Type of minus operator used in the computation
+ * @tparam Divide Type of divide operator used in the computation
+ * @param[out] Q output orthogonal matrix contaning eigenvectors
+ * @param[out] d output vector containg eigenvalues
+ * @param[in] T input symmetric tridiagonal matrix
+ * @param[in] ring A semiring for operations
+ * @return RC SUCCESS if the execution was correct
+ *
+ */
+ template<
+ typename D,
+ typename SymmOrHermTridiagonalType,
+ typename OrthogonalType,
+ typename SymmHermTrdiViewType,
+ typename OrthViewType,
+ typename SymmHermTrdiImfR,
+ typename SymmHermTrdiImfC,
+ typename OrthViewImfR,
+ typename OrthViewImfC,
+ typename VecViewType,
+ typename VecImfR,
+ typename VecImfC,
+ class Ring = Semiring< operators::add< D >, operators::mul< D >, identities::zero, identities::one >,
+ class Minus = operators::subtract< D >,
+ class Divide = operators::divide< D >
+ >
+ RC symm_tridiag_dac_eigensolver(
+ Matrix<
+ D,
+ SymmOrHermTridiagonalType,
+ Dense,
+ SymmHermTrdiViewType,
+ SymmHermTrdiImfR,
+ SymmHermTrdiImfC
+ > &T,
+ Matrix<
+ D,
+ OrthogonalType,
+ Dense,
+ OrthViewType,
+ OrthViewImfR,
+ OrthViewImfC
+ > &Q,
+ Vector<
+ D,
+ structures::General,
+ Dense,
+ VecViewType,
+ VecImfR,
+ VecImfC
+ > &d,
+ const Ring &ring = Ring(),
+ const Minus &minus = Minus(),
+ const Divide ÷ = Divide()
+ ) {
+ (void) ring;
+ (void) minus;
+ (void) divide;
+
+ const Scalar< D > zero( ring.template getZero< D >() );
+ const Scalar< D > one( ring.template getOne< D >() );
+
+ RC rc = SUCCESS;
+
+ const size_t n = nrows( T );
+ const size_t m = n / 2;
+
+ if( n == 1 ) {
+ //d=T[0];
+ rc = rc ? rc : alp::eWiseLambda(
+ [ &d ]( const size_t i, const size_t j, D &val ) {
+ (void) i;
+ (void) j;
+ alp::set( d, Scalar< D > ( val ) );
+ },
+ T
+ );
+ // Q=[[1]]; a 1x1 matrix
+ rc = rc ? rc : alp::set( Q, one );
+
+ return rc;
+ }
+
+
+ Vector< D, structures::General, Dense > v( n );
+ rc = rc ? rc : alp::set( v, zero );
+
+ auto v1 = alp::get_view( T, utils::range( m - 1, m ), m );
+ auto v2 = alp::get_view( v, utils::range( m , m + 1 ) );
+ rc = rc ? rc : alp::set( v2, v1 );
+
+ auto v3 = alp::get_view( v, utils::range( m - 1 , m ) );
+ rc = rc ? rc : alp::set( v3, one );
+
+#ifdef DEBUG
+ print_vector( " v = ", v );
+#endif
+ Matrix< D, SymmOrHermTridiagonalType, Dense > Atmp( n );
+ rc = rc ? rc : alp::set( Atmp, T );
+ auto vvt = alp::outer( v, ring.getMultiplicativeOperator() ) ;
+
+#ifdef DEBUG
+ print_matrix( " vvt = ", vvt );
+#endif
+ rc = rc ? rc : alp::foldl( Atmp, vvt, minus );
+
+#ifdef DEBUG
+ print_matrix( " Atmp(updated) ", Atmp );
+#endif
+
+ auto Ttop = alp::get_view< SymmOrHermTridiagonalType >( Atmp, utils::range( 0, m ), utils::range( 0, m ) );
+ auto Tdown = alp::get_view< SymmOrHermTridiagonalType >( Atmp, utils::range( m, n ), utils::range( m, n ) );
+
+#ifdef DEBUG
+ print_matrix( " Ttop = ", Ttop );
+ print_matrix( " Tdown = ", Tdown );
+#endif
+
+ Vector< D, structures::General, Dense > dtmp( n );
+ rc = rc ? rc : alp::set( dtmp, zero );
+ auto dtop = alp::get_view( dtmp, utils::range( 0, m ) );
+ auto ddown = alp::get_view( dtmp, utils::range( m, n ) );
+
+ Matrix< D, OrthogonalType, Dense > U( n );
+ rc = rc ? rc : alp::set( U, zero );
+
+ auto Utop = alp::get_view< OrthogonalType >( U, utils::range( 0, m ), utils::range( 0, m ) );
+ auto Udown = alp::get_view< OrthogonalType >( U, utils::range( m, n ), utils::range( m, n ) );
+
+ rc = rc ? rc : symm_tridiag_dac_eigensolver( Ttop, Utop, dtop, ring );
+ rc = rc ? rc : symm_tridiag_dac_eigensolver( Tdown, Udown, ddown, ring );
+ //std::cout << " --> ust one iteration\n";
+
+#ifdef DEBUG
+ std::cout << " after symm_tridiag_dac_eigensolver call:\n";
+ print_matrix( " Utop = ", Utop );
+ print_matrix( " Udown = ", Udown );
+ print_matrix( " U = ", U );
+#endif
+
+ Vector< D, structures::General, Dense > z( n );
+ rc = rc ? rc : alp::set( z, zero );
+
+#ifdef DEBUG
+ print_vector( " v ", v );
+ print_vector( " z ", z );
+#endif
+
+#ifdef TEMPDISABLE
+ // while mxv does not support vectors/view
+ // we cast vector->matrix and use mxm
+ auto z_mat_view = alp::get_view< view::matrix >( z );
+ auto v_mat_view = alp::get_view< view::matrix >( v );
+ rc = rc ? rc : alp::mxm(
+ z_mat_view,
+ alp::get_view< alp::view::transpose >( U ),
+ v_mat_view,
+ ring
+ );
+#else
+ //z=U^T.dot(v)
+ rc = rc ? rc : alp::mxv(
+ z,
+ alp::get_view< alp::view::transpose >( U ),
+ v,
+ ring
+ );
+#endif
+
+#ifdef DEBUG
+ print_vector( " d ", dtmp );
+ print_vector( " z ", z );
+#endif
+
+ // permutation that sorts dtmp
+ alp::Vector< size_t > permutation_vec( n );
+ rc = rc ? rc : alp::sort( permutation_vec, dtmp, alp::relations::lt< D >() );
+
+ alp::Vector< size_t > no_permutation_vec( n );
+ rc = rc ? rc : alp::set< alp::descriptors::use_index >( no_permutation_vec, alp::Scalar< size_t >( 0 ) );
+
+ auto dtmp2 = alp::get_view< alp::structures::General >(
+ dtmp,
+ permutation_vec
+ );
+ auto ztmp2 = alp::get_view< alp::structures::General >(
+ z,
+ permutation_vec
+ );
+#ifdef DEBUG
+ print_vector( " dtmp2 ", dtmp2 );
+ print_vector( " ztmp2 ", ztmp2 );
+#endif
+
+
+ rc = rc ? rc : alp::set( d, zero );
+ Matrix< D, OrthogonalType, Dense > QdOuter( n );
+ rc = rc ? rc : alp::set( QdOuter, zero );
+ auto QdOuter_diag = alp::get_view< alp::view::diagonal >( QdOuter );
+ rc = rc ? rc : alp::set(
+ QdOuter_diag,
+ one
+ );
+
+ auto QdOuter2 = alp::get_view< alp::structures::Orthogonal >(
+ QdOuter, permutation_vec, no_permutation_vec
+ );
+
+ rc = rc ? rc : eigensolveDiagPlusOuter( d, QdOuter2, dtmp2, ztmp2 );
+#ifdef DEBUG
+ print_vector( " d(out) ", d );
+ print_matrix( " QdOuter(out) ", QdOuter );
+ print_matrix( " U ", U );
+#endif
+
+ rc = rc ? rc : alp::set( Q, zero );
+ rc = rc ? rc : alp::mxm( Q, U, QdOuter, ring );
+
+#ifdef DEBUG
+ print_matrix( " Q = U x Q ", Q );
+#endif
+
+ return rc;
+ }
+ } // namespace algorithms
+} // namespace alp
diff --git a/include/alp/amf-based/functorbasedmatrix.hpp b/include/alp/amf-based/functorbasedmatrix.hpp
new file mode 100644
index 000000000..0e56a8688
--- /dev/null
+++ b/include/alp/amf-based/functorbasedmatrix.hpp
@@ -0,0 +1,151 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _H_ALP_AMF_BASED_FUNCTORBASEDMATRIX
+#define _H_ALP_AMF_BASED_FUNCTORBASEDMATRIX
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+
+namespace alp {
+
+ namespace internal {
+
+ /** Forward declaration */
+ template< typename DerivedMatrix >
+ class MatrixBase;
+
+ /** Forward declaration */
+ template< typename T, typename ImfR, typename ImfC, typename DataLambdaType >
+ class FunctorBasedMatrix;
+
+ /** Functor reference getter used by friend functions of specialized Matrix */
+ template< typename T, typename ImfR, typename ImfC, typename DataLambdaType >
+ const typename FunctorBasedMatrix< T, ImfR, ImfC, DataLambdaType >::functor_type &getFunctor( const FunctorBasedMatrix< T, ImfR, ImfC, DataLambdaType > &A );
+
+ /**
+ * Getter for the functor of a functor-based matrix.
+ *
+ * @tparam MatrixType The type of input matrix.
+ *
+ * @param[in] A Input matrix.
+ *
+ * @returns A constant reference to a functor object within the
+ * provided functor-based matrix.
+ */
+ template<
+ typename MatrixType,
+ std::enable_if_t<
+ internal::is_functor_based< MatrixType >::value
+ > * = nullptr
+ >
+ const typename MatrixType::functor_type &getFunctor( const MatrixType &A ) {
+ return static_cast< const typename MatrixType::base_type & >( A ).getFunctor();
+ }
+
+ /**
+ * Specialization of MatrixReference with a lambda function as a target.
+ * Used as a result of low-rank operation to avoid the need for allocating a container.
+ * The data is produced lazily by invoking the lambda function stored as a part of this object.
+ *
+ * \note Views-over-lambda-functions types are used internally as results of low-rank operations and are not
+ * directly exposed to users. From the users perspective, the use of objects of this type does not differ
+ * from the use of other \a alp::Matrix types. The difference lies in a lazy implementation of the access
+ * to matrix elements, which is not exposed to the user.
+ *
+ */
+ template< typename T, typename ImfR, typename ImfC, typename DataLambdaType >
+ class FunctorBasedMatrix : public MatrixBase< FunctorBasedMatrix< T, ImfR, ImfC, DataLambdaType > > {
+ public:
+
+ /** Expose static properties */
+ typedef T value_type;
+ /** Type returned by access function */
+ typedef T access_type;
+ typedef T const_access_type;
+ /** Type of the index used to access the physical storage */
+ typedef std::pair< size_t, size_t > storage_index_type;
+
+ protected:
+
+ typedef FunctorBasedMatrix< T, ImfR, ImfC, DataLambdaType > self_type;
+ friend MatrixBase< self_type >;
+
+ typedef std::function< bool() > initialized_functor_type;
+ const initialized_functor_type initialized_lambda;
+
+ const ImfR imf_r;
+ const ImfC imf_c;
+
+ const DataLambdaType data_lambda;
+
+ std::pair< size_t, size_t > dims() const noexcept {
+ return std::make_pair( imf_r.n, imf_c.n );
+ }
+
+ const DataLambdaType &getFunctor() const noexcept {
+ return data_lambda;
+ }
+
+ bool getInitialized() const noexcept {
+ return initialized_lambda();
+ }
+
+ void setInitialized( const bool ) noexcept {
+ static_assert( "Calling setInitialized on a FunctorBasedMatrix is not allowed." );
+ }
+
+ access_type access( const storage_index_type &storage_index ) const {
+ T result = 0;
+ data_lambda( result, imf_r.map( storage_index.first ), imf_c.map( storage_index.second ) );
+ return static_cast< access_type >( result );
+ }
+
+ storage_index_type getStorageIndex( const size_t i, const size_t j, const size_t s, const size_t P ) const {
+ (void)s;
+ (void)P;
+ return std::make_pair( i, j );
+ }
+
+ public:
+
+ FunctorBasedMatrix(
+ initialized_functor_type initialized_lambda,
+ ImfR imf_r,
+ ImfC imf_c,
+ const DataLambdaType data_lambda
+ ) :
+ initialized_lambda( initialized_lambda ),
+ imf_r( imf_r ),
+ imf_c( imf_c ),
+ data_lambda( data_lambda ) {}
+
+ }; // class FunctorBasedMatrix
+
+ } // namespace internal
+
+} // namespace alp
+
+#endif // end ``_H_ALP_AMF_BASED_FUNCTORBASEDMATRIX''
diff --git a/include/alp/amf-based/matrix.hpp b/include/alp/amf-based/matrix.hpp
new file mode 100644
index 000000000..c5c94df32
--- /dev/null
+++ b/include/alp/amf-based/matrix.hpp
@@ -0,0 +1,1369 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 14th of January 2022
+ */
+
+#ifndef _H_ALP_AMF_BASED_MATRIX
+#define _H_ALP_AMF_BASED_MATRIX
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include "functorbasedmatrix.hpp"
+#include "storage.hpp"
+#include "storagebasedmatrix.hpp"
+#include "vector.hpp"
+
+
+namespace alp {
+
+ /** Identifies any backend's implementation of ALP matrix as an ALP matrix. */
+ template<
+ typename T, typename Structure, enum Density density,
+ typename View, typename ImfR, typename ImfC, enum Backend backend
+ >
+ struct is_matrix< Matrix< T, Structure, density, View, ImfR, ImfC, backend > > : std::true_type {};
+
+ // Matrix-related implementation
+
+ namespace internal {
+
+ /** Forward declaration */
+ template< typename DerivedMatrix >
+ class MatrixBase;
+
+ template< typename DerivedMatrix >
+ std::pair< size_t, size_t > dims( const MatrixBase< DerivedMatrix > &A ) noexcept;
+
+ template<
+ typename MatrixType,
+ std::enable_if_t< internal::is_storage_based< MatrixType >::value > *
+ >
+ size_t getStorageDimensions( const MatrixType &A ) noexcept;
+
+ template< typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType>::value > *
+ >
+ bool getInitialized( const MatrixType &A ) noexcept;
+
+ template< typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType>::value > *
+ >
+ void setInitialized( MatrixType &, const bool ) noexcept;
+
+ /** Forward declarations for access functions */
+ template<
+ typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType >::value > * = nullptr
+ >
+ typename MatrixType::const_access_type access( const MatrixType &, const typename MatrixType::storage_index_type & );
+
+ template<
+ typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType >::value > * = nullptr
+ >
+ typename MatrixType::access_type access( MatrixType &, const typename MatrixType::storage_index_type & );
+
+ template<
+ typename MatrixType,
+ std::enable_if< is_matrix< MatrixType >::value > * = nullptr
+ >
+ typename MatrixType::storage_index_type getStorageIndex( const MatrixType &A, const size_t i, const size_t j, const size_t s = 0, const size_t P = 1 );
+
+ template< typename DerivedMatrix >
+ std::pair< size_t, size_t > dims( const MatrixBase< DerivedMatrix > & A ) noexcept;
+
+ template<
+ typename MatrixType,
+ std::enable_if_t< internal::is_storage_based< MatrixType >::value > *
+ >
+ size_t getStorageDimensions( const MatrixType &A ) noexcept;
+
+ template< typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType>::value > *
+ >
+ bool getInitialized( const MatrixType &A ) noexcept;
+
+ template< typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType>::value > *
+ >
+ void setInitialized( MatrixType &, const bool ) noexcept;
+ /**
+ * Base Matrix class containing attributes common to all Matrix specialization
+ */
+ template< typename DerivedMatrix >
+ class MatrixBase {
+
+ friend std::pair< size_t, size_t > dims<>( const MatrixBase< DerivedMatrix > &A ) noexcept;
+
+ template< typename MatrixType, std::enable_if_t< is_matrix< MatrixType>::value > * >
+ friend bool getInitialized( const MatrixType &A ) noexcept;
+
+ template< typename MatrixType, std::enable_if_t< is_matrix< MatrixType>::value > * >
+ friend void setInitialized( MatrixType &A, const bool initialized ) noexcept;
+
+ protected:
+
+ std::pair< size_t, size_t > dims() const noexcept {
+ return static_cast< const DerivedMatrix & >( *this ).dims();
+ }
+
+ template<
+ typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType >::value > *
+ >
+ friend typename MatrixType::const_access_type access( const MatrixType &A, const typename MatrixType::storage_index_type &storageIndex );
+
+ template<
+ typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType >::value > *
+ >
+ friend typename MatrixType::access_type access( MatrixType &A, const typename MatrixType::storage_index_type &storageIndex );
+
+ template<
+ typename MatrixType,
+ std::enable_if< is_matrix< MatrixType >::value > *
+ >
+ friend typename MatrixType::storage_index_type getStorageIndex( const MatrixType &A, const size_t i, const size_t j, const size_t s, const size_t P );
+
+ bool getInitialized() const {
+ return static_cast< const DerivedMatrix & >( *this ).getInitialized();
+ }
+
+ void setInitialized( const bool initialized ) {
+ static_cast< DerivedMatrix & >( *this ).setInitialized( initialized );
+ }
+
+ template< typename ConstAccessType, typename StorageIndexType >
+ ConstAccessType access( const StorageIndexType storageIndex ) const {
+ static_assert( std::is_same< ConstAccessType, typename DerivedMatrix::const_access_type >::value );
+ static_assert( std::is_same< StorageIndexType, typename DerivedMatrix::storage_index_type >::value );
+ return static_cast< const DerivedMatrix & >( *this ).access( storageIndex );
+ }
+
+ template< typename AccessType, typename StorageIndexType >
+ AccessType access( const StorageIndexType &storageIndex ) {
+ static_assert( std::is_same< AccessType, typename DerivedMatrix::access_type >::value );
+ static_assert( std::is_same< StorageIndexType, typename DerivedMatrix::storage_index_type >::value );
+ return static_cast< DerivedMatrix & >( *this ).access( storageIndex );
+ }
+
+ template< typename StorageIndexType >
+ StorageIndexType getStorageIndex( const size_t i, const size_t j, const size_t s, const size_t P ) const {
+ static_assert( std::is_same< StorageIndexType, typename DerivedMatrix::storage_index_type >::value );
+ return static_cast< const DerivedMatrix & >( *this ).getStorageIndex( i, j, s, P );
+ }
+
+ };
+
+ template<
+ typename T,
+ typename Structure,
+ enum Density density,
+ typename View,
+ typename ImfR,
+ typename ImfC,
+ enum Backend backend
+ >
+ struct matrix_base_class {
+ typedef typename std::conditional<
+ internal::is_view_over_functor< View >::value,
+ internal::FunctorBasedMatrix< T, ImfR, ImfC, typename View::applied_to >,
+ internal::StorageBasedMatrix< T,
+ typename internal::determine_amf_type< Structure, View, ImfR, ImfC, backend >::type,
+ internal::requires_allocation< View >::value,
+ backend
+ >
+ >::type type;
+ };
+
+ } // namespace internal
+
+ /**
+ * \brief An ALP structured matrix.
+ *
+ * This is an opaque data type for structured matrices.
+ *
+ * A structured matrix exposes a mathematical
+ * \em logical layout which allows to express implementation-oblivious concepts
+ * including the matrix structure itself and \em views on the matrix.
+ * The logical layout of a structured matrix maps to a physical counterpart via
+ * a storage scheme which typically depends on the chosen structure and the selected
+ * backend. alp::Matrix and alp::Vector may be used as interfaces to such a physical
+ * layout.
+ * To visualize this, you may think of a band matrix. Using a
+ * full dense or a banded storage schemes would require
+ * the use of a \a alp::Matrix container (see include/alp/density.hpp for
+ * more details about the supported storage schemes). However, the interpration of its
+ * content would differ in the two cases being a function of both the Structure
+ * information and the storage scheme combined.
+ *
+ * Views can be used to create logical \em perspectives on top of a container.
+ * For example, one may decide to refer to the transpose of a matrix or to treat
+ * for a limited part of my program a square matrix as symmetric.
+ * If a view can be expressed as a concept \em invariant of specific runtime features,
+ * such views can be defined statically (for example, one may always refer to the
+ * transpose or the diagonal of a matrix irrespective of features such as the matrix's
+ * size). Other may depend on features such as the size of a matrix
+ * (e.g., gathering/scattering the rows/columns of a matrix or permuting them).
+ *
+ * Structured matrices defined as views on other matrices do not instantiate a
+ * new container but refer to the one used by their targets.
+ * See the two specializations
+ * \a Matrix
+ * and \a Matrix, backend >
+ * as examples of structured matrix types without and with physical container, respectively.
+ *
+ *
+ * @tparam T The type of the matrix elements. \a T shall not be a GraphBLAS
+ * type.
+ * @tparam Structure One of the matrix structures defined in \a alp::structures.
+ * @tparam density Either \em enum \a Density::Dense or \em enum
+ * \a storage::Sparse.
+ * @tparam View One of the matrix views in \a alp::view.
+ * All static views except for \a view::Original (via
+ * \a view::Original cannot instantiate a new container
+ * and only allow to refer to a previously defined
+ * \a Matrix.
+ * The \a View parameter should not be used directly
+ * by the user but selected via \a get_view function.
+ *
+ * See examples of Matrix definitions within \a include/alp/reference/matrix.hpp
+ * and the \a dense_structured_matrix.cpp unit test.
+ *
+ */
+ template< typename T, typename Structure, enum Density density, typename View, typename ImfR, typename ImfC, enum Backend backend >
+ class Matrix :
+ public internal::matrix_base_class< T, Structure, Density::Dense, View, ImfR, ImfC, backend >::type {
+
+ protected:
+ typedef Matrix< T, Structure, Density::Dense, View, ImfR, ImfC, backend > self_type;
+
+ /*********************
+ Storage info friends
+ ******************** */
+
+ template< typename fwd_iterator >
+ friend RC buildMatrix( Matrix< T, Structure, Density::Dense, View, ImfR, ImfC, backend > &A,
+ const fwd_iterator & start, const fwd_iterator & end );
+
+ template< typename fwd_iterator >
+ RC buildMatrixUnique( const fwd_iterator &start, const fwd_iterator &end ) {
+ std::cout << "Building Matrix<>; calling buildMatrix( Matrix<> )\n";
+ return buildMatrix( *(this->_container), start, end );
+ }
+
+ public:
+ /** Exposes the types and the static properties. */
+ typedef Structure structure;
+ /**
+ * Indicates if a matrix needs to allocate data-related memory
+ * (for the internal container or functor object).
+ * False if it is a view over another matrix or a functor.
+ */
+ static constexpr bool requires_allocation = internal::requires_allocation< View >::value;
+
+ /**
+ * Expose the base type class to enable internal functions to cast
+ * the type of objects of this class to the base class type.
+ */
+ typedef typename internal::matrix_base_class< T, Structure, Density::Dense, View, ImfR, ImfC, backend >::type base_type;
+
+ template < view::Views view_tag, bool d = false >
+ struct view_type;
+
+ template < bool d >
+ struct view_type< view::original, d > {
+ using type = Matrix< T, Structure, Density::Dense, view::Original< self_type >, imf::Id, imf::Id, backend >;
+ };
+
+ template < bool d >
+ struct view_type< view::gather, d > {
+ using type = Matrix<
+ T,
+ typename structures::apply_view< view::gather, Structure >::type,
+ Density::Dense, view::Gather< self_type >, imf::Strided, imf::Strided, backend
+ >;
+ };
+
+ template < bool d >
+ struct view_type< view::transpose, d > {
+ using type = Matrix<
+ T,
+ typename structures::apply_view< view::transpose, Structure >::type,
+ Density::Dense, view::Transpose< self_type >, imf::Id, imf::Id, backend
+ >;
+ };
+
+ template < bool d >
+ struct view_type< view::diagonal, d > {
+ using type = Vector< T, structures::General, Density::Dense, view::Diagonal< self_type >, imf::Id, imf::Zero, backend >;
+ };
+
+ /**
+ * Constructor for a storage-based matrix that allocates storage.
+ * Specialization for a matrix with not necessarily equal row and column dimensions.
+ */
+ template<
+ typename ThisStructure = Structure,
+ std::enable_if_t<
+ internal::is_view_over_storage< View >::value &&
+ internal::requires_allocation< View >::value &&
+ !structures::is_in< structures::Square, typename ThisStructure::inferred_structures >::value
+ > * = nullptr
+ >
+ Matrix( const size_t rows, const size_t cols, const size_t cap = 0 ) :
+ base_type(
+ storage::AMFFactory< backend >::template FromPolynomial<
+ Structure, ImfR, ImfC
+ >::Create(
+ ImfR( rows ),
+ ImfC( cols )
+ )
+ ) {
+
+ (void) cap;
+
+ // This check should be performed in the class body rather than here.
+ // Allocation-requiring matrix with incompatible IMFs should not be instantiable at all.
+ // Here it is only forbidden to invoke this constructor for such a matrix.
+ static_assert(
+ std::is_same< ImfR, imf::Id >::value &&
+ ( std::is_same< ImfC, imf::Id >::value || std::is_same< ImfC, imf::Zero >::value ),
+ "This constructor can only be used with a matrix having Id IMFs."
+ );
+
+ }
+
+ /*
+ * Constructor for a storage-based matrix that allocates storage.
+ * Specialization for matrices with equal row and column dimensions.
+ */
+ template<
+ typename ThisStructure = Structure,
+ std::enable_if_t<
+ internal::is_view_over_storage< View >::value &&
+ internal::requires_allocation< View >::value &&
+ structures::is_in< structures::Square, typename ThisStructure::inferred_structures >::value
+ > * = nullptr
+ >
+ Matrix( const size_t dim, const size_t cap = 0 ) :
+ base_type(
+ storage::AMFFactory< backend >::template FromPolynomial<
+ Structure, ImfR, ImfC
+ >::Create(
+ ImfR( dim ),
+ ImfC( dim )
+ )
+ ) {
+
+ (void) cap;
+
+ static_assert(
+ std::is_same< ImfR, imf::Id >::value &&
+ ( std::is_same< ImfC, imf::Id >::value || std::is_same< ImfC, imf::Zero >::value ),
+ "This constructor can only be used with a matrix having Id IMFs."
+ );
+
+ }
+
+ /**
+ * Constructor for a view over another storage-based matrix.
+ *
+ * @tparam SourceType The type of the target matrix.
+ *
+ */
+ template<
+ typename SourceType,
+ std::enable_if_t<
+ std::is_same< SourceType, typename View::applied_to >::value &&
+ internal::is_view_over_storage< View >::value &&
+ !internal::requires_allocation< View >::value
+ > * = nullptr
+ >
+ Matrix( SourceType &source_matrix, ImfR imf_r, ImfC imf_c ) :
+ base_type(
+ getContainer( source_matrix ),
+ storage::AMFFactory< backend >::template Compose<
+ ImfR, ImfC, typename SourceType::base_type::amf_type
+ >::Create( imf_r, imf_c, internal::getAmf( source_matrix ) )
+ ) {}
+
+ /**
+ * Constructor for a view over another matrix applying a view defined
+ * by View template parameter of the constructed matrix.
+ *
+ * @tparam SourceType The type of the target matrix.
+ *
+ */
+ template<
+ typename SourceType,
+ std::enable_if_t<
+ std::is_same< SourceType, typename View::applied_to >::value &&
+ internal::is_view_over_storage< View >::value &&
+ !internal::requires_allocation< View >::value
+ > * = nullptr
+ >
+ Matrix( SourceType &source_matrix ) :
+ base_type(
+ getContainer( source_matrix ),
+ storage::AMFFactory< backend >::template Reshape< View::type_id, typename SourceType::amf_type >::Create( internal::getAmf( source_matrix ) )
+ ) {}
+
+ /**
+ * Constructor for a view over an internal container of another matrix.
+ *
+ * @tparam SourceType The type of the target matrix.
+ * @tparam AmfType The type of the amf corresponding to the layout of
+ * the provided container.
+ * Used as a template parameter to avoid hard
+ * compilation error in the case of FunctorBasedMatrix,
+ * when base_type::amf_type does not exist.
+ */
+ template<
+ typename BufferType,
+ typename AmfType,
+ std::enable_if_t<
+ !is_container< BufferType >::value &&
+ internal::is_view_over_storage< View >::value
+ > * = nullptr
+ >
+ Matrix( BufferType &&buffer, const size_t buffer_size, AmfType &&amf ) :
+ base_type(
+ buffer,
+ buffer_size,
+ std::forward< typename base_type::amf_type >( amf )
+ ) {
+ static_assert(
+ std::is_same< typename base_type::amf_type, typename std::remove_reference< AmfType >::type >::value,
+ "The type of the provided AMF does not match the type of constructed container's AMF"
+ );
+ }
+
+ /**
+ * Constructor for a view over another matrix' internal container.
+ *
+ * @tparam ContainerType The type of the internal container.
+ * @tparam AmfType The type of the amf used to construct the matrix.
+ * Used as a template parameter to benefit from
+ * SFINAE for the case of FunctorBasedMatrix, when
+ * base_type::amf_type does not exist and, therefore,
+ * using the expression base_type::amf_type would
+ * result in a hard compilation error.
+ */
+ template<
+ typename ContainerType,
+ typename AmfType,
+ std::enable_if_t<
+ internal::is_container< ContainerType >::value &&
+ internal::is_view_over_storage< View >::value &&
+ !internal::requires_allocation< View >::value
+ > * = nullptr
+ >
+ Matrix( ContainerType &container, AmfType &&amf ) :
+ base_type(
+ container,
+ std::forward< typename base_type::amf_type >( amf )
+ ) {
+ static_assert(
+ std::is_same< typename base_type::amf_type, typename std::remove_reference< AmfType >::type >::value,
+ "The AMF type of the constructor parameter needs to match the AMF type of this container specialization."
+ );
+ }
+
+ /**
+ * Constructor for a functor-based matrix that allocates memory.
+ * Specialization for a matrix with non necessarily equal row and column dimensions.
+ *
+ * @tparam LambdaType The type lambda function associated to the data.
+ *
+ */
+ template<
+ typename LambdaType,
+ std::enable_if_t<
+ std::is_same< LambdaType, typename View::applied_to >::value &&
+ internal::is_view_over_functor< View >::value &&
+ internal::requires_allocation< View >::value &&
+ !structures::is_in< structures::Square, typename Structure::inferred_structures >::value
+ > * = nullptr
+ >
+ Matrix( std::function< bool() > initialized, const size_t rows, const size_t cols, LambdaType lambda ) :
+ base_type( initialized, imf::Id( rows ), imf::Id( cols ), lambda ) {
+
+ static_assert(
+ std::is_same< ImfR, imf::Id >::value &&
+ std::is_same< ImfC, imf::Id >::value,
+ "This constructor can only be used with Id IMFs."
+ );
+
+ }
+
+ /**
+ * Constructor for a functor-based matrix that allocates memory.
+ * Specialization for a matrix with equal row and column dimensions.
+ *
+ * @tparam LambdaType The type lambda function associated to the data.
+ *
+ */
+ template<
+ typename LambdaType,
+ std::enable_if_t<
+ std::is_same< LambdaType, typename View::applied_to >::value &&
+ internal::is_view_over_functor< View >::value &&
+ internal::requires_allocation< View >::value &&
+ structures::is_in< structures::Square, typename Structure::inferred_structures >::value
+ > * = nullptr
+ >
+ Matrix( std::function< bool() > initialized, const size_t dim, LambdaType lambda ) :
+ base_type( initialized, imf::Id( dim ), imf::Id( dim ), lambda ) {
+
+ static_assert(
+ std::is_same< ImfR, imf::Id >::value &&
+ std::is_same< ImfC, imf::Id >::value,
+ "This constructor can only be used with Id IMFs."
+ );
+
+ }
+
+ /**
+ * Constructor for a view over another functor-based matrix.
+ *
+ * @tparam SourceType The type of the target matrix.
+ *
+ */
+ template<
+ typename SourceType,
+ std::enable_if_t<
+ std::is_same< SourceType, typename View::applied_to >::value &&
+ internal::is_view_over_functor< View >::value &&
+ !internal::requires_allocation< View >::value
+ > * = nullptr
+ >
+ Matrix( SourceType &source_matrix, ImfR imf_r, ImfC imf_c ) :
+ base_type( getFunctor( source_matrix ), imf_r, imf_c ) {}
+
+ /**
+ * @deprecated
+ * Constructor for a view over another functor-based matrix.
+ *
+ * @tparam SourceType The type of the target matrix.
+ *
+ */
+ template<
+ typename SourceType,
+ std::enable_if_t<
+ std::is_same< SourceType, typename View::applied_to >::value &&
+ internal::is_view_over_functor< View >::value &&
+ !internal::requires_allocation< View >::value
+ > * = nullptr
+ >
+ Matrix( SourceType &source_matrix ) :
+ Matrix( getFunctor( source_matrix ),
+ imf::Id( nrows ( source_matrix ) ),
+ imf::Id( ncols ( source_matrix ) )
+ ) {
+
+ static_assert(
+ std::is_same< ImfR, imf::Id >::value &&
+ std::is_same< ImfC, imf::Id >::value,
+ "This constructor can only be used with Id IMFs."
+ );
+
+ }
+ }; // ALP Matrix
+
+ namespace structures {
+
+ /**
+ * Calculates the iteration space for row-dimension for the given matrix and band index.
+ *
+ * @tparam MatrixType The type of ALP matrix
+ * @tparam band_index The index of the desired matrix band
+ *
+ * @param[in] A ALP matrix
+ *
+ * @returns a pair of size_t values,
+ * the first representing lower and the second upper limit.
+ *
+ * \note Each backend shall specialize this function as its implementation
+ * depends on the way backend handles storage of different structures.
+ */
+ template<
+ size_t band_index, typename MatrixType,
+ std::enable_if_t<
+ is_matrix< MatrixType >::value
+ > * = nullptr
+ >
+ std::pair< size_t, size_t > calculate_row_coordinate_limits( const MatrixType &A );
+
+ /**
+ * Calculates the iteration space for column-dimension for the given matrix, band index and row index.
+ *
+ * @tparam MatrixType The type of ALP matrix
+ * @tparam band_index The index of the desired matrix band
+ *
+ * @param[in] A ALP matrix
+ * @param[in] row Row index
+ *
+ * @returns a pair of size_t values,
+ * the first representing lower and the second upper limit.
+ *
+ * \note Each backend shall specialize this function as its implementation
+ * depends on the way backend handles storage of different structures.
+ */
+ template<
+ size_t band_index, typename MatrixType,
+ std::enable_if_t<
+ is_matrix< MatrixType >::value
+ > * = nullptr
+ >
+ std::pair< size_t, size_t > calculate_column_coordinate_limits( const MatrixType &A, const size_t row );
+
+ } // namespace structures
+
+ /**
+ *
+ * @brief Generate a view specified by \a target_view where the type is compliant with the
+ * \a source matrix.
+ * The function guarantees the created view is non-overlapping with other
+ * existing views only when the check can be performed in constant time.
+ *
+ * @tparam target_view One of the supported views listed in \a view::Views
+ * @tparam SourceMatrix The type of the source matrix
+ *
+ * @param source The source ALP matrix
+ *
+ * @return A new \a target_view view over the source matrix.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function performs
+ * \f$ \Theta(nref) \f$ amount of work where \f$ nref \f$ is the number
+ * of available views of \a source.
+ * -# A call to this function may use \f$ \mathcal{O}(1) \f$ bytes
+ * of memory beyond the memory in use at the function call entry.
+ * -# This function may make system calls.
+ * \endparblock
+ *
+ */
+ template<
+ enum view::Views target_view = view::original,
+ typename SourceMatrix,
+ std::enable_if_t<
+ is_matrix< SourceMatrix >::value &&
+ target_view != view::diagonal
+ > * = nullptr
+ >
+ typename SourceMatrix::template view_type< target_view >::type
+ get_view( SourceMatrix &source ) {
+
+ using target_strmat_t = typename SourceMatrix::template view_type< target_view >::type;
+
+ return target_strmat_t( source );
+ }
+
+ /** Specialization for diagonal view over Square matrix */
+ template<
+ enum view::Views target_view = view::original,
+ typename SourceMatrix,
+ std::enable_if_t<
+ is_matrix< SourceMatrix >::value &&
+ target_view == view::diagonal &&
+ structures::is_in< structures::Square, typename SourceMatrix::structure::inferred_structures >::value
+ > * = nullptr
+ >
+ typename SourceMatrix::template view_type< view::diagonal >::type
+ get_view( SourceMatrix &source ) {
+
+ using target_t = typename SourceMatrix::template view_type< view::diagonal >::type;
+ return target_t( source );
+ }
+
+ /**
+ * Specialization for diagonal view over non-Square matrix.
+ * A diagonal view is created over a intermediate gather
+ * view with a square structure.
+ */
+ template<
+ enum view::Views target_view = view::original,
+ typename SourceMatrix,
+ std::enable_if_t<
+ is_matrix< SourceMatrix >::value &&
+ target_view == view::diagonal &&
+ !structures::is_in< structures::Square, typename SourceMatrix::structure::inferred_structures >::value
+ > * = nullptr
+ >
+ typename internal::new_container_type_from<
+ typename SourceMatrix::template view_type< view::gather >::type
+ >::template change_structure< structures::Diagonal >::type
+ ::template view_type< view::diagonal >::type
+ get_view( SourceMatrix &source ) {
+
+ const size_t source_rows = nrows( source );
+ const size_t source_cols = ncols( source );
+ const size_t smaller_dimension = std::min( source_rows, source_cols );
+ auto square_view = get_view< structures::Diagonal >( source, utils::range( 0, smaller_dimension ), utils::range( 0, smaller_dimension ) );
+ return get_view< view::diagonal >( square_view );
+ }
+
+ /**
+ *
+ * @brief Generate an original view where the type is compliant with the source Matrix.
+ * Version where a target structure is specified. It can only generate a valid type if the target
+ * structure is the same as the source's
+ * or a more specialized one that would preserve its static properties (e.g., symmetric reference
+ * to a square matrix -- any assumption based on symmetry would not break those based on square).
+ * The function guarantees the created view is non-overlapping with other existing views only when the
+ * check can be performed in constant time.
+ *
+ * @tparam TargetStructure The target structure of the new view. It should verify
+ * alp::is_in .
+ * @tparam SourceMatrix The type of the source matrix
+ *
+ * @param source The source ALP matrix
+ *
+ * @return A new original view over the source ALP matrix.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function performs
+ * \f$ \Theta(nref) \f$ amount of work where \f$ nref \f$ is the number
+ * of available views of \a source.
+ * -# A call to this function may use \f$ \mathcal{O}(1) \f$ bytes
+ * of memory beyond the memory in use at the function call entry.
+ * -# This function may make system calls.
+ * \endparblock
+ *
+ */
+ template<
+ typename TargetStructure,
+ typename SourceMatrix,
+ std::enable_if< is_matrix< SourceMatrix >::value > * = nullptr
+ >
+ typename internal::new_container_type_from<
+ typename SourceMatrix::template view_type< view::original >::type
+ >::template change_structure< TargetStructure >::type
+ get_view( SourceMatrix &source ) {
+
+ static_assert( structures::is_in< typename SourceMatrix::structure, typename TargetStructure::inferred_structures >::value,
+ "Can only create a view when the target structure is compatible with the source." );
+
+ using target_strmat_t = typename internal::new_container_type_from<
+ typename SourceMatrix::template view_type< view::original >::type
+ >::template change_structure< TargetStructure >::type;
+
+ return target_strmat_t( source );
+ }
+
+ namespace internal {
+
+ /**
+ * Implement a gather through a View over compatible Structure using provided Index Mapping Functions.
+ * The compatibility depends on the TargetStructure, SourceStructure and IMFs, and is calculated during runtime.
+ */
+ template<
+ typename TargetStructure, typename TargetImfR, typename TargetImfC,
+ typename SourceMatrix,
+ std::enable_if_t< is_matrix< SourceMatrix >::value > * = nullptr
+ >
+ typename internal::new_container_type_from<
+ typename SourceMatrix::template view_type< view::gather >::type
+ >::template change_structure< TargetStructure >::_and_::
+ template change_imfr< TargetImfR >::_and_::
+ template change_imfc< TargetImfC >::type
+ get_view( SourceMatrix &source, TargetImfR imf_r, TargetImfC imf_c ) {
+
+ //if( std::dynamic_pointer_cast< imf::Select >( imf_r ) || std::dynamic_pointer_cast< imf::Select >( imf_c ) ) {
+ // throw std::runtime_error("Cannot gather with imf::Select yet.");
+ //}
+ // No static check as the compatibility depends on IMF, which is a runtime level parameter
+ //if( ! (TargetStructure::template isInstantiableFrom< Structure >( static_cast< TargetImfR & >( imf_r ), static_cast< TargetImfR & >( imf_c ) ) ) ) {
+ if( ! (structures::isInstantiable< typename SourceMatrix::structure, TargetStructure >::check( imf_r, imf_c ) ) ) {
+ throw std::runtime_error("Cannot gather into specified TargetStructure from provided SourceStructure and Index Mapping Functions.");
+ }
+
+ using target_t = typename internal::new_container_type_from<
+ typename SourceMatrix::template view_type< view::gather >::type
+ >::template change_structure< TargetStructure >::_and_::
+ template change_imfr< TargetImfR >::_and_::
+ template change_imfc< TargetImfC >::type;
+
+ return target_t( source, imf_r, imf_c );
+ }
+ } // namespace internal
+
+ /**
+ *
+ * @brief Generate an original view where the type is compliant with the source Matrix.
+ * Version where a range of rows and columns are selected to form a new view with specified target
+ * structure. It can only generate a valide type if the target
+ * structure is guaranteed to preserve the static properties of the source's structure.
+ * A structural check of this kind as well as non-overlapping checks with existing views of \a source
+ * are guaranteed only when each one of them incurs constant time work.
+ *
+ * @tparam TargetStructure The target structure of the new view. It should verify
+ * alp::is_in .
+ * @tparam SourceMatrix The type of source ALP matrix
+ *
+ * @param source The source ALP matrix
+ * @param rng_r A valid range of rows
+ * @param rng_c A valid range of columns
+ *
+ * @return A new original view over the source ALP matrix.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function performs
+ * \f$ \Theta(nref) \f$ amount of work where \f$ nref \f$ is the number
+ * of available views of \a source.
+ * -# A call to this function may use \f$ \mathcal{O}(1) \f$ bytes
+ * of memory beyond the memory in use at the function call entry.
+ * -# This function may make system calls.
+ * \endparblock
+ *
+ */
+ template<
+ typename TargetStructure,
+ typename SourceMatrix,
+ std::enable_if_t< is_matrix< SourceMatrix >::value > * = nullptr
+ >
+ typename internal::new_container_type_from<
+ typename SourceMatrix::template view_type< view::gather >::type
+ >::template change_structure< TargetStructure >::type
+ get_view(
+ SourceMatrix &source,
+ const utils::range& rng_r, const utils::range& rng_c
+ ) {
+
+ return internal::get_view< TargetStructure >(
+ source,
+ std::move( imf::Strided( rng_r.count(), nrows(source), rng_r.start, rng_r.stride ) ),
+ std::move( imf::Strided( rng_c.count(), ncols(source), rng_c.start, rng_c.stride ) )
+ );
+ }
+
+ /**
+ *
+ * @brief Generate an original view where the type is compliant with the source Matrix.
+ * Version where no target structure is specified (in this case the structure of the source type is assumed as target)
+ * with row and column selection.
+ * A structure preserving check as well as non-overlapping checks with existing views of \a source
+ * are guaranteed only when each one of them incurs constant time work.
+ *
+ * @tparam SourceMatrix The type of source ALP matrix
+ *
+ * @param source The source matrix
+ * @param rng_r A valid range of rows
+ * @param rng_c A valid range of columns
+ *
+ * @return A new original view over the source structured matrix.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function performs
+ * \f$ \Theta(nref) \f$ amount of work where \f$ nref \f$ is the number
+ * of available views of \a source.
+ * -# A call to this function may use \f$ \mathcal{O}(1) \f$ bytes
+ * of memory beyond the memory in use at the function call entry.
+ * -# This function may make system calls.
+ * \endparblock
+ *
+ */
+
+ template<
+ typename SourceMatrix,
+ std::enable_if_t< is_matrix< SourceMatrix >::value > * = nullptr
+ >
+ typename SourceMatrix::template view_type< view::gather >::type
+ get_view(
+ SourceMatrix &source,
+ const utils::range &rng_r,
+ const utils::range &rng_c
+ ) {
+
+ return internal::get_view< typename SourceMatrix::structure >(
+ source,
+ imf::Strided( rng_r.count(), nrows(source), rng_r.start, rng_r.stride ),
+ imf::Strided( rng_c.count(), ncols(source), rng_c.start, rng_c.stride ) );
+ }
+
+ /**
+ *
+ * @brief Generate a vector view on a column of the source matrix.
+ *
+ * @tparam SourceMatrix The type of the source ALP matrix
+ *
+ * @param source The source matrix
+ * @param rng_r A valid range of rows
+ * @param sel_c A valid column index
+ *
+ * @return A new gather view over the source ALP matrix.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function performs
+ * \f$ \Theta(nref) \f$ amount of work where \f$ nref \f$ is the number
+ * of available views of \a source.
+ * -# A call to this function may use \f$ \mathcal{O}(1) \f$ bytes
+ * of memory beyond the memory in use at the function call entry.
+ * -# This function may make system calls.
+ * \endparblock
+ *
+ */
+ template<
+ typename SourceMatrix,
+ std::enable_if_t< is_matrix< SourceMatrix >::value > * = nullptr
+ >
+ typename internal::new_container_type_from<
+ typename SourceMatrix::template view_type< view::gather >::type
+ >::template change_container< alp::Vector >::_and_::
+ template change_structure< structures::General >::_and_::
+ template change_imfc< imf::Constant >::type
+ get_view(
+ SourceMatrix &source,
+ const utils::range &rng_r,
+ const size_t &sel_c
+ ) {
+ using target_t = typename internal::new_container_type_from<
+ typename SourceMatrix::template view_type< view::gather >::type
+ >::template change_container< alp::Vector >::_and_::
+ template change_structure< structures::General >::_and_::
+ template change_imfc< imf::Constant >::type;
+
+ return target_t(
+ source,
+ imf::Strided( rng_r.count(), nrows( source ), rng_r.start, rng_r.stride ),
+ imf::Constant( 1, ncols( source ), sel_c )
+ );
+ }
+
+ /**
+ *
+ * @brief Generate a vector view on a row of the source matrix.
+ *
+ * @tparam SourceMatrix The type of the source ALP matrix
+ *
+ * @param source The source matrix
+ * @param sel_r A valid row index
+ * @param rng_c A valid range of columns
+ *
+ * @return A new gather view over the source ALP matrix.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function performs
+ * \f$ \Theta(nref) \f$ amount of work where \f$ nref \f$ is the number
+ * of available views of \a source.
+ * -# A call to this function may use \f$ \mathcal{O}(1) \f$ bytes
+ * of memory beyond the memory in use at the function call entry.
+ * -# This function may make system calls.
+ * \endparblock
+ *
+ * \note \internal Row-view is implemented as a column view over a
+ * tranposed source matrix
+ *
+ */
+ template<
+ typename SourceMatrix,
+ std::enable_if_t< is_matrix< SourceMatrix >::value > * = nullptr
+ >
+ typename internal::new_container_type_from<
+ typename SourceMatrix::template view_type< view::transpose >::type::template view_type< view::gather >::type
+ >::template change_container< alp::Vector >::_and_::
+ template change_structure< structures::General >::_and_::
+ template change_imfc< imf::Constant >::type
+ get_view(
+ SourceMatrix &source,
+ const size_t &sel_r,
+ const utils::range &rng_c
+ ) {
+ auto source_transposed = get_view< view::transpose >( source );
+ return get_view( source_transposed, rng_c, sel_r );
+ }
+
+ /**
+ *
+ * Generate a dynamic gather view where the type is compliant with the source Matrix.
+ * Version where a selection of rows and columns, expressed as vectors of indices,
+ * forms a new view with specified target structure.
+ *
+ * @tparam TargetStructure The target structure of the new view. It should verify
+ * alp::is_in .
+ * @tparam SourceMatrix The type of the source ALP matrix
+ * @tparam SelectVectorR The type of the ALP vector defining permutation for rows
+ * @tparam SelectVectorC The type of the ALP vector defining permutation for columns
+ *
+ * @param source The source ALP matrix
+ * @param sel_r A valid permutation vector of a subset of row indices
+ * @param sel_c A valid permutation vector of a subset of column indices
+ *
+ * @return A new gather view over the source ALP matrix.
+ *
+ */
+ template<
+ typename TargetStructure,
+ typename SourceMatrix,
+ typename SelectVectorR, typename SelectVectorC,
+ std::enable_if_t<
+ is_matrix< SourceMatrix >::value &&
+ is_vector< SelectVectorR >::value &&
+ is_vector< SelectVectorC >::value
+ > * = nullptr
+ >
+ typename internal::new_container_type_from<
+ typename SourceMatrix::template view_type< view::gather >::type
+ >::template change_structure< TargetStructure >::_and_::
+ template change_imfr< imf::Select >::_and_::
+ template change_imfc< imf::Select >::type
+ get_view(
+ SourceMatrix &source,
+ const SelectVectorR &sel_r,
+ const SelectVectorC &sel_c
+ ) {
+ return internal::get_view< TargetStructure >(
+ source,
+ imf::Select( nrows( source ), sel_r ),
+ imf::Select( ncols( source ), sel_c )
+ );
+ }
+
+
+ /** Definitions of previously declared global methods that operate on ALP Matrix */
+ namespace internal {
+
+ template<
+ typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType>::value > *
+ >
+ bool getInitialized( const MatrixType &A ) noexcept {
+ return static_cast< const MatrixBase< typename MatrixType::base_type > & >( A ).template getInitialized();
+ }
+
+ template<
+ typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType>::value > *
+ >
+ void setInitialized( MatrixType &A, const bool initialized ) noexcept {
+ return static_cast< MatrixBase< typename MatrixType::base_type > & >( A ).template setInitialized( initialized );
+ }
+
+ template< typename DerivedMatrix >
+ std::pair< size_t, size_t > dims( const MatrixBase< DerivedMatrix > &A ) noexcept {
+ return A.dims();
+ }
+
+ /** Access the matrix element.
+ *
+ * @tparam MatrixType ALP Matrix type
+ *
+ * @param[in] A matrix to be accessed
+ * @param[in] storageIndex index in the physical iteration space
+ *
+ * @return For container matrices, returns a constant reference to the
+ * element at the given physical position of matrix A.
+ * For functor view matrices, returns a value corresponding to
+ * the given physical position of matrix A.
+ *
+ * \note This method may be used to access only elements local to the processor.
+ */
+ template<
+ typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType >::value > *
+ >
+ typename MatrixType::const_access_type access( const MatrixType &A, const typename MatrixType::storage_index_type &storageIndex ) {
+ return static_cast<
+ const MatrixBase< typename MatrixType::base_type > &
+ >( A ).template access< typename MatrixType::const_access_type, typename MatrixType::storage_index_type >( storageIndex );
+ }
+
+ /** Non-constant variant. **/
+ template<
+ typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType >::value > *
+ >
+ typename MatrixType::access_type access( MatrixType &A, const typename MatrixType::storage_index_type &storageIndex ) {
+ return static_cast<
+ MatrixBase< typename MatrixType::base_type > &
+ >( A ).template access< typename MatrixType::access_type, typename MatrixType::storage_index_type >( storageIndex );
+ }
+
+ /** Return a storage index in the physical layout.
+ *
+ * @tparam MatrixType ALP Matrix type
+ *
+ * @param[in] A matrix to be accessed
+ * @param[in] i row-index in the logical layout
+ * @param[in] j column-index in the logical layout
+ * @param[in] s process ID
+ * @param[in] P total number of processors
+ *
+ * @return For container matrices, returns a constant reference to the
+ * element at the given physical position of matrix A.
+ * For functor view matrices, returns a value corresponding to
+ * the given physical position of matrix A.
+ *
+ */
+ template<
+ typename MatrixType,
+ std::enable_if< is_matrix< MatrixType >::value > *
+ >
+ typename MatrixType::storage_index_type getStorageIndex( const MatrixType &A, const size_t i, const size_t j, const size_t s, const size_t P ) {
+ return static_cast< const MatrixBase< typename MatrixType::base_type > & >( A ).template getStorageIndex< typename MatrixType::storage_index_type >( i, j, s, P );
+ }
+
+ /** Return a pair of coordinates in logical layout.
+ *
+ * @tparam MatrixType ALP Matrix type
+ *
+ * @param[in] A matrix to be accessed
+ * @param[in] storageIndex storage index in the physical layout.
+ * @param[in] s process ID
+ * @param[in] P total number of processors
+ *
+ * @return Returns a pair of coordinates in logical iteration space
+ * that correspond to the provided storage index in the
+ * physical iteration space.
+ *
+ */
+ template< typename MatrixType >
+ std::pair< size_t, size_t > getCoords( const MatrixType &A, const size_t storageIndex, const size_t s, const size_t P );
+
+ } // namespace internal
+
+ template< typename D, typename Structure, typename View, typename ImfR, typename ImfC, enum Backend backend >
+ size_t nrows( const Matrix< D, Structure, Density::Dense, View, ImfR, ImfC, backend > &A ) noexcept {
+ return dims( A ).first;
+ }
+
+ template< typename D, typename Structure, typename View, typename ImfR, typename ImfC, enum Backend backend >
+ size_t ncols( const Matrix< D, Structure, Density::Dense, View, ImfR, ImfC, backend > &A ) noexcept {
+ return dims( A ).second;
+ }
+
+ template< typename D, typename Structure, typename View, typename ImfR, typename ImfC, enum Backend backend >
+ std::pair< size_t, size_t > dims( const Matrix< D, Structure, Density::Dense, View, ImfR, ImfC, backend > &A ) noexcept {
+ return internal::dims( static_cast< const internal::MatrixBase<
+ typename Matrix< D, Structure, Density::Dense, View, ImfR, ImfC, backend >::base_type > & > ( A ) );
+ }
+
+ namespace structures {
+
+ template<
+ size_t band,
+ typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType >::value > * = nullptr
+ >
+ std::ptrdiff_t get_lower_limit( const MatrixType &A ) {
+
+ return structures::get_lower_limit< band, typename MatrixType::structure >( nrows( A ) );
+
+ }
+
+ template<
+ size_t band,
+ typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType >::value > * = nullptr
+ >
+ std::ptrdiff_t get_upper_limit( const MatrixType &A ) {
+
+ return structures::get_upper_limit< band, typename MatrixType::structure >( ncols( A ) );
+
+ }
+
+ /**
+ * Specialization for reference backend.
+ * @see alp::structures::calculate_row_coordinate_limits
+ */
+ template<
+ size_t band_index, typename MatrixType,
+ std::enable_if_t<
+ is_matrix< MatrixType >::value
+ > *
+ >
+ std::pair< size_t, size_t > calculate_row_coordinate_limits( const MatrixType &A ) {
+
+ using Structure = typename MatrixType::structure;
+
+ static_assert(
+ band_index < std::tuple_size< typename Structure::band_intervals >::value,
+ "Provided band index is out of bounds."
+ );
+
+ // cast matrix dimensions to signed integer to allow for comparison with negative numbers
+ const std::ptrdiff_t M = static_cast< std::ptrdiff_t >( nrows( A ) );
+ const std::ptrdiff_t N = static_cast< std::ptrdiff_t >( ncols( A ) );
+
+ // band limits are negated and inverted due to different orientation
+ // of coordinate system of band and matrix dimensions.
+ const std::ptrdiff_t l = -structures::get_upper_limit< band_index >( A );
+ const std::ptrdiff_t u = N - structures::get_lower_limit< band_index >( A );
+
+ // fit the limits within the matrix dimensions
+ const size_t lower_limit = static_cast< size_t >( std::max( std::min( l, M ), static_cast< std::ptrdiff_t >( 0 ) ) );
+ const size_t upper_limit = static_cast< size_t >( std::max( std::min( u, M ), static_cast< std::ptrdiff_t >( 0 ) ) );
+
+ assert( lower_limit <= upper_limit );
+
+ return std::make_pair( lower_limit, upper_limit );
+ }
+
+ /**
+ * Specialization for reference backend.
+ * @see alp::structures::calculate_column_coordinate_limits
+ */
+ template<
+ size_t band_index, typename MatrixType,
+ std::enable_if_t<
+ is_matrix< MatrixType >::value
+ > *
+ >
+ std::pair< size_t, size_t > calculate_column_coordinate_limits( const MatrixType &A, const size_t row ) {
+
+ using Structure = typename MatrixType::structure;
+
+ // Declaring this to avoid static casts to std::ptrdiff_t in std::min and std::max calls
+ const std::ptrdiff_t signed_zero = 0;
+
+ static_assert(
+ band_index < std::tuple_size< typename Structure::band_intervals >::value,
+ "Provided band index is out of bounds."
+ );
+
+ assert( row < nrows( A ) );
+
+ // cast matrix dimensions to signed integer to allow for comparison with negative numbers
+ const std::ptrdiff_t N = static_cast< std::ptrdiff_t >( ncols( A ) );
+
+ constexpr bool is_sym = structures::is_a< Structure, structures::Symmetric >::value;
+ // Temporary until adding multiple symmetry directions
+ constexpr bool sym_up = is_sym;
+
+ // Band limits
+ const std::ptrdiff_t l = structures::get_lower_limit< band_index >( A );
+ const std::ptrdiff_t u = structures::get_upper_limit< band_index >( A );
+
+ // Band limits taking into account symmetry
+ const std::ptrdiff_t sym_l = is_sym && sym_up ? std::max( signed_zero, l ) : l;
+ const std::ptrdiff_t sym_u = is_sym && !sym_up ? std::min( signed_zero, u ) : u;
+
+ // column coordinate lower and upper limits considering the provided row coordinate
+ const std::ptrdiff_t sym_l_row = static_cast< std::ptrdiff_t >( row ) + sym_l;
+ const std::ptrdiff_t sym_u_row = sym_l_row + ( sym_u - sym_l );
+
+ // fit the limits within the matrix dimensions
+ const size_t lower_limit = static_cast< size_t >( std::max( std::min( sym_l_row, N ), signed_zero ) );
+ const size_t upper_limit = static_cast< size_t >( std::max( std::min( sym_u_row, N ), signed_zero ) );
+
+ assert( lower_limit <= upper_limit );
+
+ return std::make_pair( lower_limit, upper_limit );
+ }
+
+ } // namespace structures
+
+ namespace structures {
+ namespace constant {
+
+ /** Returns a constant reference to an Identity matrix of the provided size */
+ template< typename T, Backend backend >
+ const Matrix<
+ T, structures::Identity, Density::Dense,
+ view::Functor< std::function< const T( const size_t, const size_t ) > >,
+ imf::Id, imf::Id, backend
+ >
+ I( const size_t n ) {
+
+ return Matrix<
+ T, structures::Identity, Density::Dense,
+ view::Functor< std::function< const T( const size_t, const size_t ) > >,
+ imf::Id, imf::Id, backend
+ >(
+ []( const size_t i, const size_t j ) {
+ return ( i == j ) ? 1 : 0;
+ },
+ n,
+ n
+ );
+ }
+
+ /** Returns a constant reference to a Zero matrix of the provided size */
+ template< typename T, Backend backend >
+ const Matrix<
+ T, structures::Zero, Density::Dense,
+ view::Functor< std::function< const T( const size_t, const size_t ) > >,
+ imf::Id, imf::Id, reference
+ >
+ Zero( const size_t rows, const size_t cols ) {
+ return Matrix<
+ T, structures::Zero, Density::Dense,
+ view::Functor< std::function< const T( const size_t, const size_t ) > >,
+ imf::Id, imf::Id, reference
+ > (
+ []( const size_t, const size_t ) {
+ return 0;
+ },
+ rows,
+ cols
+ );
+ }
+
+ namespace internal {
+
+ /** Returns a constant reference to a matrix representing Givens rotation
+ * of the provided size n and parameters i, j, s and c, where
+ * s = sin( theta ) and c = cos( theta )
+ */
+ template< typename T, Backend backend >
+ const Matrix<
+ T, structures::Square, Density::Dense,
+ view::Original< void >, imf::Id, imf::Id, backend
+ > &
+ Givens( const size_t n, const size_t i, const size_t j, const T s, const T c ) {
+ using return_type = const Matrix<
+ T, structures::Square, Density::Dense, view::Original< void >, imf::Id, imf::Id, backend
+ >;
+ return_type * ret = new return_type( n );
+ // TODO: initialize matrix values according to the provided parameters
+ return * ret;
+ }
+
+ } // namespace internal
+ } // namespace constant
+ } // namespace structures
+} // namespace alp
+
+#endif // end ``_H_ALP_AMF_BASED_MATRIX''
diff --git a/include/alp/amf-based/storage.hpp b/include/alp/amf-based/storage.hpp
new file mode 100644
index 000000000..7d5561339
--- /dev/null
+++ b/include/alp/amf-based/storage.hpp
@@ -0,0 +1,1201 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ * @file
+ *
+ * This file registers mechanisms for coordinate mapping between
+ * logical and physical iteration spaces.
+ *
+ */
+
+#ifndef _H_ALP_AMF_BASED_STORAGE
+#define _H_ALP_AMF_BASED_STORAGE
+
+#include
+#include
+
+#include
+#include
+#include
+
+
+namespace alp {
+
+ namespace internal {
+
+ /**
+ * Determines the mapping polynomial type and exposes a factory method
+ * to create instances of that polynomial.
+ *
+ * All specializations of this type trait should define the factory
+ * method following the same signature. The factory method shall
+ * return an object of the type exposed as \a type.
+ *
+ * @tparam Structure Matrix structure
+ * @tparam ImfR Row IMF type
+ * @tparam ImfC Column IMF type
+ * @tparam backend The backend
+ *
+ */
+ template< typename Structure, typename ImfR, typename ImfC, enum Backend backend >
+ struct determine_poly_factory {};
+
+ } // namespace internal
+
+ namespace storage {
+
+ enum StorageOrientation {
+ ROW_WISE,
+ COLUMN_WISE
+ };
+
+ enum StoredPart {
+ UPPER,
+ LOWER
+ };
+
+ /**
+ * The namespace containts polynomials used to map coordinates
+ * between logical and physical iteration spaces,
+ * associated type traits and helper classes.
+ */
+ namespace polynomials {
+
+ /**
+ * Implements the polynomial
+ * ( A*a*x^2 + B*b*y^2 + C*c*x*y + D*d*x + E*e*y + F*f ) / Denominator
+ * where uppercase coefficients are compile-time constant,
+ * lowercase coefficients are run-time constant,
+ * and x and y are variables.
+ * All coefficients and variables are integers and all operations are integer
+ * operations.
+ *
+ * The purpose of compile-time constant coefficients is to allow compile-time
+ * optimizations for zero terms/monomials.
+ *
+ * Denominator allows for implementaiton of polynomials with integer division,
+ * e.g., n * ( n + 1 ) / 2,
+ * while avoiding the need for floating point coefficients and operations.
+ *
+ * @tparam Ax2 Static coefficient corresponding to x^2
+ * @tparam Ay2 Static coefficient corresponding to y^2
+ * @tparam Axy Static coefficient corresponding to x*y
+ * @tparam Ax Static coefficient corresponding to x
+ * @tparam Ay Static coefficient corresponding to y
+ * @tparam A0 Static coefficient corresponding to constant term
+ * @tparam Denominator Static denominator dividing the whole polynomial
+ */
+ template<
+ size_t coeffAx2, size_t coeffAy2, size_t coeffAxy,
+ size_t coeffAx, size_t coeffAy,
+ size_t coeffA0,
+ size_t Denominator
+ >
+ struct BivariateQuadratic {
+
+ static_assert( Denominator != 0, "Denominator cannot be zero (division by zero).");
+ typedef int64_t dyn_coef_t;
+
+ static constexpr size_t Ax2 = coeffAx2;
+ static constexpr size_t Ay2 = coeffAy2;
+ static constexpr size_t Axy = coeffAxy;
+ static constexpr size_t Ax = coeffAx;
+ static constexpr size_t Ay = coeffAy;
+ static constexpr size_t A0 = coeffA0;
+ static constexpr size_t D = Denominator;
+ const dyn_coef_t ax2, ay2, axy, ax, ay, a0;
+
+ BivariateQuadratic(
+ const dyn_coef_t ax2, const dyn_coef_t ay2, const dyn_coef_t axy,
+ const dyn_coef_t ax, const dyn_coef_t ay,
+ const dyn_coef_t a0 ) :
+ ax2( ax2 ), ay2( ay2 ), axy( axy ),
+ ax( ax ), ay( ay ),
+ a0( a0 ) {}
+
+ size_t evaluate( const size_t x, const size_t y ) const {
+ return (Ax2 * ax2 * x * x +
+ Ay2 * ay2 * y * y +
+ Axy * axy * x * y +
+ Ax * ax * x +
+ Ay * ay * y +
+ A0 * a0) / D;
+ }
+
+ }; // BivariateQuadratic
+
+ /** \internal Defines the interface implemented by other polynomial factories */
+ struct AbstractFactory {
+
+ /** \internal Defines the type of the polynomial returned by Create */
+ typedef BivariateQuadratic< 0, 0, 0, 0, 0, 0, 1 > poly_type;
+
+ /** \internal Instantiates a polynomial */
+ static poly_type Create( const size_t rows, const size_t cols );
+
+ /** \internal Returns the size of storage associated with the defined polynomial */
+ static size_t GetStorageDimensions( const size_t rows, const size_t cols );
+
+ }; // struct AbstractFactory
+
+ /** p(i,j) = 0 */
+ struct NoneFactory {
+
+ typedef BivariateQuadratic< 0, 0, 0, 0, 0, 0, 1 > poly_type;
+
+ static poly_type Create( const size_t rows, const size_t cols ) {
+ (void) rows;
+ (void) cols;
+ return poly_type( 0, 0, 0, 0, 0, 0 );
+ }
+
+ static size_t GetStorageDimensions( const size_t rows, const size_t cols ) {
+ (void) rows;
+ (void) cols;
+ return 0;
+ }
+ }; // struct NoneFactory
+
+ /** p(i,j) = Ni + j */
+ template< bool row_major = true >
+ struct FullFactory {
+
+ typedef BivariateQuadratic< 0, 0, 0, 1, 1, 0, 1 > poly_type;
+
+ static poly_type Create( const size_t rows, const size_t cols ) {
+ if( row_major ){
+ return poly_type( 0, 0, 0, cols, 1, 0 );
+ } else {
+ return poly_type( 0, 0, 0, 1, rows, 0 );
+ }
+ }
+
+ static size_t GetStorageDimensions( const size_t rows, const size_t cols ) {
+ return rows * cols;
+ }
+ }; // struct FullFactory
+
+ /** Implements packed, triangle-like storage */
+ template< enum StoredPart stored_part, enum StorageOrientation orientation >
+ struct PackedFactory;
+
+ /** p(i,j) = (-i^2 + (2N - 1)i + 2j) / 2 */
+ template<>
+ struct PackedFactory< UPPER, ROW_WISE > {
+
+ typedef BivariateQuadratic< 1, 0, 0, 1, 2, 0, 2 > poly_type;
+
+ static poly_type Create( const size_t rows, const size_t cols ) {
+#ifdef NDEBUG
+ (void) cols;
+ (void) rows;
+#endif
+ assert( rows == cols );
+ return poly_type( -1, 0, 0, 2 * cols - 1, 1, 0 );
+ }
+
+ static size_t GetStorageDimensions( const size_t rows, const size_t cols ) {
+#ifdef NDEBUG
+ (void) cols;
+#endif
+ assert( rows == cols );
+ return rows * ( rows + 1 ) / 2;
+ }
+ };
+
+ /** p(i,j) = (j^2 + 2i + j) / 2 */
+ template<>
+ struct PackedFactory< UPPER, COLUMN_WISE > {
+
+ typedef BivariateQuadratic< 0, 1, 0, 2, 1, 0, 2 > poly_type;
+
+ static poly_type Create( const size_t rows, const size_t cols ) {
+#ifdef NDEBUG
+ (void) cols;
+ (void) rows;
+#endif
+ assert( rows == cols );
+ return poly_type( 0, 1, 0, 1, 1, 0 );
+ }
+
+ static size_t GetStorageDimensions( const size_t rows, const size_t cols ) {
+#ifdef NDEBUG
+ (void) cols;
+#endif
+ assert( rows == cols );
+ return rows * ( rows + 1 ) / 2;
+ }
+ }; // struct PackedFactory
+
+ /** p(i,j) = (i^2 + i + 2j) / 2 */
+ template<>
+ struct PackedFactory< LOWER, ROW_WISE > {
+
+ typedef BivariateQuadratic< 1, 0, 0, 1, 2, 0, 2 > poly_type;
+
+ static poly_type Create( const size_t rows, const size_t cols ) {
+#ifdef NDEBUG
+ (void) cols;
+ (void) rows;
+#endif
+ assert( rows == cols );
+ return poly_type( 1, 0, 0, 1, 1, 0 );
+ }
+
+ static size_t GetStorageDimensions( const size_t rows, const size_t cols ) {
+#ifdef NDEBUG
+ (void) cols;
+#endif
+ assert( rows == cols );
+ return rows * ( rows + 1 ) / 2;
+ }
+ }; // struct PackedFactory
+
+ /** p(i,j) = (-j^2 + 2i + (2M - 1)j) / 2 */
+ template<>
+ struct PackedFactory< LOWER, COLUMN_WISE > {
+
+ typedef BivariateQuadratic< 0, 1, 0, 2, 1, 0, 2 > poly_type;
+
+ static poly_type Create( const size_t rows, const size_t cols ) {
+#ifdef NDEBUG
+ (void) rows;
+ (void) cols;
+#endif
+ assert( rows == cols );
+ return poly_type( 0, -1, 0, 1, 2 * rows - 1, 0 );
+ }
+
+ static size_t GetStorageDimensions( const size_t rows, const size_t cols ) {
+#ifdef NDEBUG
+ (void) cols;
+#endif
+ assert( rows == cols );
+ return rows * ( rows + 1 ) / 2;
+ }
+ };
+
+ template< size_t l, size_t u, bool row_wise >
+ struct BandFactory {
+
+ typedef BivariateQuadratic< 0, 0, 0, 0, 0, 0, 1 > poly_type;
+
+ static poly_type Create( const size_t rows, const size_t cols ) {
+ (void) rows;
+ (void) cols;
+ throw std::runtime_error( "Needs an implementation." );
+ }
+
+ static size_t GetStorageDimensions( const size_t rows, const size_t cols ) {
+ (void) rows;
+ (void) cols;
+ throw std::runtime_error( "Needs an implementation." );
+ }
+ }; // struct BandFactory
+
+ struct ArrayFactory {
+ /** p(i,j) = i */
+ typedef BivariateQuadratic< 0, 0, 0, 1, 0, 0, 1 > poly_type;
+
+ static poly_type Create( const size_t rows, const size_t cols ) {
+ (void) rows;
+ (void) cols;
+ return poly_type( 0, 0, 0, 1, 0, 0 );
+ }
+
+ static size_t GetStorageDimensions( const size_t rows, const size_t cols ) {
+ assert( ( rows == 1 ) || ( cols == 1 ) );
+ return rows * cols;
+ }
+ };
+
+ template< enum view::Views view, typename Polynomial >
+ struct apply_view {};
+
+ template< typename Polynomial >
+ struct apply_view< view::original, Polynomial > {
+ typedef Polynomial type;
+ };
+
+ template< typename Polynomial >
+ struct apply_view< view::transpose, Polynomial > {
+ typedef BivariateQuadratic< Polynomial::Ay2, Polynomial::Ax2, Polynomial::Axy, Polynomial::Ay, Polynomial::Ax, Polynomial::A0, Polynomial::D > type;
+ };
+
+ template< typename Polynomial >
+ struct apply_view< view::diagonal, Polynomial > {
+ typedef Polynomial type;
+ };
+
+ template< typename Polynomial >
+ struct apply_view< view::_internal, Polynomial > {
+ typedef typename NoneFactory::poly_type type;
+ };
+
+ /**
+ * Specifies the resulting IMF and Polynomial types after fusing
+ * the provided IMF and Polynomial and provides two factory methods
+ * to create the IMF and the Polynomial of the resulting types.
+ * In the general case, the fusion does not happen and the resulting
+ * types are equal to the provided types.
+ */
+ template< typename Imf, typename Poly >
+ struct fuse_on_i {
+
+ typedef Imf resulting_imf_type;
+ typedef Poly resulting_polynomial_type;
+
+ static resulting_imf_type CreateImf( Imf imf ) {
+ return imf;
+ }
+
+ static resulting_polynomial_type CreatePolynomial( Imf imf, Poly p ) {
+ (void) imf;
+ return p;
+ }
+ };
+
+ /**
+ * Specialization for Id IMF.
+ */
+ template< typename Poly >
+ struct fuse_on_i< imf::Id, Poly > {
+
+ /** The resulting IMF is an Id because strided IMF is fully fused into the polynomial */
+ typedef imf::Id resulting_imf_type;
+
+ /** Some static factors change after injecting strided IMF into the polynomial */
+ typedef Poly resulting_polynomial_type;
+
+ static resulting_imf_type CreateImf( imf::Id imf ) {
+ return imf::Id( imf.n );
+ }
+
+ static resulting_polynomial_type CreatePolynomial( imf::Id imf, Poly p ) {
+ (void)imf;
+ return p;
+ }
+ };
+
+ /**
+ * Specialization for strided IMF.
+ */
+ template< typename Poly >
+ struct fuse_on_i< imf::Strided, Poly> {
+
+ /** The resulting IMF is an Id because strided IMF is fully fused into the polynomial */
+ typedef imf::Id resulting_imf_type;
+
+ /** Some static factors change after injecting strided IMF into the polynomial */
+ typedef BivariateQuadratic<
+ Poly::Ax2, Poly::Ay2, Poly::Axy,
+ Poly::Ax2 || Poly::Ax, Poly::Axy || Poly::Ay,
+ Poly::Ax2 || Poly::Ax || Poly::A0,
+ Poly::D
+ > resulting_polynomial_type;
+
+ static resulting_imf_type CreateImf( imf::Strided imf ) {
+ return imf::Id( imf.n );
+ }
+
+ static resulting_polynomial_type CreatePolynomial( imf::Strided imf, Poly p ) {
+ return resulting_polynomial_type(
+ p.ax2 * imf.s * imf.s, // ax2
+ p.ay2, // ay2
+ p.axy * imf.s, // axy
+ 2 * Poly::Ax2 * p.ax2 * imf.s * imf.b + Poly::Ax * p.ax * imf.s, // ax
+ Poly::Ay * p.ay + Poly::Axy * p.axy * imf.b, // ay
+ Poly::Ax2 * p.ax2 * imf.b * imf.b + Poly::Ax * p.ax * imf.b + Poly::A0 * p.a0 // A0
+ );
+ }
+ };
+
+ /**
+ * Specialization for zero IMF.
+ */
+ template< typename Poly >
+ struct fuse_on_i< imf::Zero, Poly> {
+
+ /** The resulting IMF is an Id because strided IMF is fully fused into the polynomial */
+ typedef imf::Id resulting_imf_type;
+
+ /** Some static factors change after injecting strided IMF into the polynomial */
+ typedef BivariateQuadratic<
+ 0, Poly::Ay2, 0,
+ 0, Poly::Ay,
+ Poly::A0,
+ Poly::D
+ > resulting_polynomial_type;
+
+ static resulting_imf_type CreateImf( imf::Zero imf ) {
+ return imf::Id( imf.n );
+ }
+
+ static resulting_polynomial_type CreatePolynomial( imf::Zero imf, Poly p ) {
+ (void)imf;
+ return resulting_polynomial_type(
+ 0, // ax2
+ p.ay2, // ay2
+ 0, // axy
+ 0, // ax
+ p.ay, // ay
+ p.a0 // A0
+ );
+ }
+ };
+
+ template< typename Imf, typename Poly >
+ struct fuse_on_j {
+
+ typedef Imf resulting_imf_type;
+ typedef Poly resulting_polynomial_type;
+
+ static resulting_imf_type CreateImf( Imf imf ) {
+ return imf;
+ }
+
+ static resulting_polynomial_type CreatePolynomial( Imf imf, Poly p ) {
+ (void) imf;
+ return p;
+ }
+ };
+
+ /**
+ * Specialization for Id IMF.
+ */
+ template< typename Poly >
+ struct fuse_on_j< imf::Id, Poly > {
+
+ /** The resulting IMF is an Id because strided IMF is fully fused into the polynomial */
+ typedef imf::Id resulting_imf_type;
+
+ /** Some static factors change after injecting strided IMF into the polynomial */
+ typedef Poly resulting_polynomial_type;
+
+ static resulting_imf_type CreateImf( imf::Id imf ) {
+ return imf::Id( imf.n );
+ }
+
+ static resulting_polynomial_type CreatePolynomial( imf::Id imf, Poly p ) {
+ (void)imf;
+ return p;
+ }
+ };
+
+ /**
+ * Specialization for strided IMF.
+ */
+ template< typename Poly >
+ struct fuse_on_j< imf::Strided, Poly > {
+
+ /** The resulting IMF is an Id because strided IMF is fully fused into the polynomial */
+ typedef imf::Id resulting_imf_type;
+
+ /** Some static factors change after injecting strided IMF into the polynomial */
+ typedef BivariateQuadratic<
+ Poly::Ax2, Poly::Ay2, Poly::Axy,
+ Poly::Axy || Poly::Ax, Poly::Ay2 || Poly::Ay,
+ Poly::Ay2 || Poly::Ay || Poly::A0,
+ Poly::D
+ > resulting_polynomial_type;
+
+ static resulting_imf_type CreateImf( imf::Strided imf ) {
+ return imf::Id( imf.n );
+ }
+
+ static resulting_polynomial_type CreatePolynomial( imf::Strided imf, Poly p ) {
+ return resulting_polynomial_type(
+ p.ax2, // ax2
+ p.ay2 * imf.s * imf.s, // ay2
+ p.axy * imf.s, // axy
+ Poly::Ax * p.ax + Poly::Axy * p.axy * imf.b, // ax
+ 2 * Poly::Ay2 * p.ay2 * imf.s * imf.b + Poly::Ay * p.ay * imf.s, // ay
+ Poly::Ay2 * p.ay2 * imf.b * imf.b + Poly::Ay * p.ay * imf.b + Poly::A0 * p.a0 // A0
+ );
+ }
+ };
+
+ /**
+ * Specialization for constant-mapping IMF.
+ */
+ template< typename Poly >
+ struct fuse_on_j< imf::Constant, Poly > {
+
+ /** The resulting IMF is an Id because strided IMF is fully fused into the polynomial */
+ typedef imf::Id resulting_imf_type;
+
+ /** j factors contribute to the constant factor, while they become 0 */
+ typedef BivariateQuadratic<
+ Poly::Ax2, 0, 0,
+ Poly::Ax || Poly::Axy, 0,
+ Poly::A0 || Poly::Ay || Poly::Ay2,
+ Poly::D
+ > resulting_polynomial_type;
+
+ static resulting_imf_type CreateImf( imf::Constant imf ) {
+ return imf::Id( imf.n );
+ }
+
+ static resulting_polynomial_type CreatePolynomial( imf::Constant imf, Poly p ) {
+ return resulting_polynomial_type(
+ p.ax2, // ax2
+ 0, // ay2
+ 0, // axy
+ Poly::Ax * p.ax +
+ Poly::Axy * p.axy * imf.b, // ax
+ 0, // ay
+ Poly::A0 * p.a0 +
+ Poly::Ay * p.ay * imf.b +
+ Poly::Ay2 * p.ay2 * imf.b * imf.b // A0
+ );
+ }
+ };
+
+ /**
+ * Specialization for zero IMF.
+ */
+ template< typename Poly >
+ struct fuse_on_j< imf::Zero, Poly > {
+
+ /** The resulting IMF is an Id because strided IMF is fully fused into the polynomial */
+ typedef imf::Id resulting_imf_type;
+
+ /** Some static factors change after injecting strided IMF into the polynomial */
+ typedef BivariateQuadratic<
+ Poly::Ax2, 0, 0,
+ Poly::Ax, 0,
+ Poly::A0,
+ Poly::D
+ > resulting_polynomial_type;
+
+ static resulting_imf_type CreateImf( imf::Zero imf ) {
+ return imf::Id( imf.n );
+ }
+
+ static resulting_polynomial_type CreatePolynomial( imf::Zero imf, Poly p ) {
+ (void)imf;
+ return resulting_polynomial_type(
+ p.ax2, // ax2
+ 0, // ay2
+ 0, // axy
+ p.ax, // ax
+ 0, // ay
+ p.a0 // A0
+ );
+ }
+ };
+
+ }; // namespace polynomials
+
+ /** Forward declaration */
+ template< enum Backend backend >
+ class AMFFactory;
+
+ /**
+ * Access Mapping Function (AMF) maps logical matrix coordinates (i, j)
+ * to the corresponding matrix element's location in the physical container.
+ *
+ * To calculate the mapping, the AMF first applies logical-to-logical
+ * mapping provided by one IMF per coordinate (row and column).
+ * A bivariate polynomial (called mapping polynomial) takes these two
+ * output coordinates as inputs to calculate the position is physical
+ * storage of the requested element (logical-to-physical mapping).
+ *
+ * For certain combinations of IMFs and mapping polynomial types it is
+ * possible to fuse the index computation into a single function call.
+ * AMF specializations for such IMF and polynomial types are free to do
+ * any optimizations.
+ *
+ * All AMF specializations shall expose the effective types of the IMFs
+ * and the mapping polynomial, since these may change after the fusion.
+ */
+ template<
+ typename ImfR, typename ImfC, typename MappingPolynomial,
+ enum Backend backend
+ >
+ class AMF {
+
+ friend class AMFFactory< backend >;
+
+ public:
+
+ /** Expose static properties */
+ typedef ImfR imf_r_type;
+ typedef ImfC imf_c_type;
+ typedef MappingPolynomial mapping_polynomial_type;
+
+ private:
+
+ const imf_r_type imf_r;
+ const imf_c_type imf_c;
+ const mapping_polynomial_type map_poly;
+ const size_t storage_dimensions;
+
+ AMF( ImfR imf_r, ImfC imf_c, MappingPolynomial map_poly, const size_t storage_dimensions ) :
+ imf_r( imf_r ), imf_c( imf_c ), map_poly( map_poly ), storage_dimensions( storage_dimensions ) {}
+
+ AMF( const AMF & ) = delete;
+ AMF &operator=( const AMF & ) = delete;
+
+ public:
+
+ AMF( AMF &&amf ) :
+ imf_r( std::move( amf.imf_r ) ),
+ imf_c( std::move( amf.imf_c ) ),
+ map_poly( std::move( amf.map_poly ) ),
+ storage_dimensions( std::move( amf.storage_dimensions ) ) {}
+
+ /**
+ * Returns dimensions of the logical layout of the associated container.
+ *
+ * @return A pair of two values, number of rows and columns, respectively.
+ */
+ std::pair< size_t, size_t> getLogicalDimensions() const {
+ return std::make_pair( imf_r.n, imf_c.n );
+ }
+
+ /**
+ * Returns dimensions of the physical layout of the associated container.
+ *
+ * @return The size of the physical container.
+ */
+ std::size_t getStorageDimensions() const {
+ return storage_dimensions;
+ }
+
+ /**
+ * @brief Returns a storage index based on the coordinates in the
+ * logical iteration space.
+ *
+ * @tparam R ImfR type
+ * @tparam C ImfC type
+ *
+ * @param[in] i row-coordinate
+ * @param[in] j column-coordinate
+ * @param[in] s current process ID
+ * @param[in] P total number of processes
+ *
+ * @return storage index corresponding to the provided logical
+ * coordinates and parameters s and P.
+ *
+ * \note It is not necessary to call imf.map() function if the imf
+ * has the type imf::Id. To implement SFINAE-driven selection
+ * of the getStorageIndex, dummy parameters R and C are added.
+ * They are set to the ImfR and ImfC by default and a static
+ * assert ensures that external caller does not force a call
+ * to wrong implementation by explicitly specifying values
+ * for R and/or C.
+ *
+ */
+ template<
+ typename R = ImfR, typename C = ImfC,
+ std::enable_if_t< !std::is_same< R, imf::Id >::value && !std::is_same< C, imf::Id >::value > * = nullptr
+ >
+ size_t getStorageIndex( const size_t i, const size_t j, const size_t s, const size_t P ) const {
+ static_assert(
+ std::is_same< R, ImfR >::value && std::is_same< C, ImfC >::value,
+ "Explicit specialization of getStorageIndex is not allowed."
+ );
+ (void)s;
+ (void)P;
+ return map_poly.evaluate( imf_r.map( i ), imf_c.map( j ) );
+ }
+
+ template<
+ typename R = ImfR, typename C = ImfC,
+ std::enable_if_t< std::is_same< R, imf::Id >::value && !std::is_same< C, imf::Id >::value > * = nullptr
+ >
+ size_t getStorageIndex( const size_t i, const size_t j, const size_t s, const size_t P ) const {
+ static_assert(
+ std::is_same< R, ImfR >::value && std::is_same< C, ImfC >::value,
+ "Explicit specialization of getStorageIndex is not allowed."
+ );
+ (void)s;
+ (void)P;
+ return map_poly.evaluate( i, imf_c.map( j ) );
+ }
+
+ template<
+ typename R = ImfR, typename C = ImfC,
+ std::enable_if_t< !std::is_same< R, imf::Id >::value && std::is_same< C, imf::Id >::value > * = nullptr
+ >
+ size_t getStorageIndex( const size_t i, const size_t j, const size_t s, const size_t P ) const {
+ static_assert(
+ std::is_same< R, ImfR >::value && std::is_same< C, ImfC >::value,
+ "Explicit specialization of getStorageIndex is not allowed."
+ );
+ (void)s;
+ (void)P;
+ return map_poly.evaluate( imf_r.map( i ), j );
+ }
+
+ template<
+ typename R = ImfR, typename C = ImfC,
+ std::enable_if_t< std::is_same< R, imf::Id >::value && std::is_same< C, imf::Id >::value > * = nullptr
+ >
+ size_t getStorageIndex( const size_t i, const size_t j, const size_t s, const size_t P ) const {
+ static_assert(
+ std::is_same< R, ImfR >::value && std::is_same< C, ImfC >::value,
+ "Explicit specialization of getStorageIndex is not allowed."
+ );
+ (void)s;
+ (void)P;
+ return map_poly.evaluate( i, j );
+ }
+
+ /**
+ * Returns coordinates in the logical iteration space based on
+ * the storage index.
+ *
+ * @param[in] storageIndex storage index in the physical
+ * iteration space
+ * @param[in] s current process ID
+ * @param[in] P total number of processes
+ *
+ * @return a pair of row- and column-coordinates in the
+ * logical iteration space.
+ */
+ std::pair< size_t, size_t > getCoords( const size_t storageIndex, const size_t s, const size_t P ) const;
+
+ }; // class AMF
+
+ /**
+ * Collects AMF factory classes.
+ */
+ template< enum Backend backend >
+ struct AMFFactory {
+
+ /**
+ * @brief Transforms the provided AMF by applying the gather view
+ * represented by the given row and column IMFs
+ *
+ * Exposes the type of the resulting AMF and implements a factory
+ * method that creates objects of such type.
+ * The IMFs and the AMF may be fusedi (simplified), depending on
+ * the properties of the IMFs. For example, static IMFs (e.g. Id,
+ * Strided) are easily fused into the mapping polynomial.
+ *
+ * Fusion of the IMFs into the mapping polynomial results in a
+ * reduced amount of function calls and, potentially, less computation,
+ * on each call to the map function of the AMF. This is especially
+ * beneficial for longer chains of views.
+ *
+ * Creation of the new AMF is done in following order:
+ * - view row IMF and target row IMF are composed
+ * - view col IMF and target col IMF are composed
+ * - composed row IMF is fused into the target Poly, if possible,
+ * yielding the new intermediate polynomial
+ * - composed col IMF is fused, if possible, into the intermediate
+ * polynomial, obtained above. This results in the final fused
+ * polynomial.
+ *
+ * @tparam view The enum value of the desired view type.
+ * @tparam SourceAMF The type of the target AMF
+ *
+ */
+ template< typename ViewImfR, typename ViewImfC, typename SourceAMF >
+ struct Compose {
+
+ private:
+
+ /** Extract target IMF and polynomial types */
+ typedef typename SourceAMF::imf_r_type SourceImfR;
+ typedef typename SourceAMF::imf_c_type SourceImfC;
+ typedef typename SourceAMF::mapping_polynomial_type SourcePoly;
+
+ /** Compose row and column IMFs */
+ typedef typename imf::ComposedFactory< SourceImfR, ViewImfR >::type composed_imf_r_type;
+ typedef typename imf::ComposedFactory< SourceImfC, ViewImfC >::type composed_imf_c_type;
+
+ /** Fuse composed row IMF into the target polynomial */
+ typedef typename polynomials::fuse_on_i<
+ composed_imf_r_type,
+ SourcePoly
+ > fused_row;
+
+ /** Fuse composed column IMF into the intermediate polynomial obtained above */
+ typedef typename polynomials::fuse_on_j<
+ composed_imf_c_type,
+ typename fused_row::resulting_polynomial_type
+ > fused_row_col;
+
+ typedef typename fused_row::resulting_imf_type final_imf_r_type;
+ typedef typename fused_row_col::resulting_imf_type final_imf_c_type;
+ typedef typename fused_row_col::resulting_polynomial_type final_polynomial_type;
+
+ public:
+
+ typedef AMF< final_imf_r_type, final_imf_c_type, final_polynomial_type, backend > amf_type;
+
+ static amf_type Create( ViewImfR imf_r, ViewImfC imf_c, const AMF< SourceImfR, SourceImfC, SourcePoly, backend > &amf ) {
+ composed_imf_r_type composed_imf_r = imf::ComposedFactory< SourceImfR, ViewImfR >::create( amf.imf_r, imf_r );
+ composed_imf_c_type composed_imf_c = imf::ComposedFactory< SourceImfC, ViewImfC >::create( amf.imf_c, imf_c );
+ return amf_type(
+ fused_row::CreateImf( composed_imf_r ),
+ fused_row_col::CreateImf( composed_imf_c ),
+ fused_row_col::CreatePolynomial(
+ composed_imf_c,
+ fused_row::CreatePolynomial( composed_imf_r, amf.map_poly )
+ ),
+ amf.storage_dimensions
+ );
+ }
+
+ Compose() = delete;
+
+ }; // class Compose
+
+ /**
+ * @brief Describes an AMF for a container that requires allocation
+ * and exposes the AMFs type and a factory method to create it.
+ *
+ * A container that requires allocation is accompanied by Id IMFs for
+ * both row and column dimensions and the provided mapping polynomial.
+ *
+ * @tparam PolyType Type of the mapping polynomial.
+ *
+ */
+ template< typename Structure, typename ImfR, typename ImfC >
+ struct FromPolynomial {
+
+ // Ensure compatibility of IMF types.
+ // Original Matrix has imf::Id as both IMFs.
+ // Original Vector has ImfR = imf::Id and ImfC = imf::Zero.
+ static_assert(
+ std::is_same< ImfR, imf::Id >::value &&
+ ( std::is_same< ImfC, imf::Id >::value || std::is_same< ImfC, imf::Zero >::value ),
+ "AMF factory FromPolynomial can only be used for an original container."
+ );
+
+ typedef typename internal::determine_poly_factory< Structure, ImfR, ImfC, backend >::factory_type PolyFactory;
+
+ typedef AMF< imf::Id, imf::Id, typename PolyFactory::poly_type, backend > amf_type;
+
+ /**
+ * Factory method used by 2D containers.
+ *
+ * @param[in] imf_r Row IMF
+ * @param[in] imf_c Column IMF
+ * @param[in] poly Mapping polynomial
+ * @param[in] storage_dimensions Size of the allocated storage
+ *
+ * @return An AMF object of the type \a amf_type
+ *
+ */
+ static amf_type Create( imf::Id imf_r, imf::Id imf_c ) {
+ return amf_type( imf_r, imf_c, PolyFactory::Create( imf_r.n, imf_c.n ), PolyFactory::GetStorageDimensions( imf_r.n, imf_c.n ) );
+ }
+
+ /**
+ * Factory method used by 1D containers.
+ *
+ * Exploits the fact that fusion of strided IMFs into the polynomial
+ * always succeeds and results in Id IMFs. As a result, the
+ * constructed AMF is of the type \a amf_type.
+ *
+ * @param[in] imf_r Row IMF
+ * @param[in] imf_c Column IMF
+ * @param[in] poly Mapping polynomial
+ * @param[in] storage_dimensions Size of the allocated storage
+ *
+ * @return An AMF object of the type \a amf_type
+ *
+ * \note \internal To exploit existing mechanism for IMF fusion
+ * into the polynomial, this method creates a
+ * dummy AMF out of two Id IMFs and the provided
+ * polynomial and composes the provided Strided
+ * IMFs with the dummy AMF.
+ */
+ static amf_type Create( imf::Id imf_r, imf::Zero imf_c ) {
+
+ /**
+ * Ensure that the assumptions do not break upon potential
+ * future changes to AMFFactory::Compose.
+ */
+ static_assert(
+ std::is_same<
+ amf_type,
+ typename Compose< imf::Id, imf::Zero, AMF< imf::Id, imf::Id, typename PolyFactory::poly_type, backend > >::amf_type
+ >::value,
+ "The factory method returns the object of different type than declared. This is a bug."
+ );
+ return Compose< imf::Id, imf::Zero, AMF< imf::Id, imf::Id, typename PolyFactory::poly_type, backend > >::Create(
+ imf_r, imf_c,
+ FromPolynomial< Structure, imf::Id, imf::Zero >::Create( imf::Id( imf_r.N ), imf::Id( imf_c.N ) )
+ );
+ }
+
+ FromPolynomial() = delete;
+
+ }; // class FromPolynomial
+
+ /**
+ * @brief Transforms the provided AMF by applying the provided View type.
+ *
+ * Exposes the type of the resulting AMF and implements a factory
+ * method that creates objects of such type.
+ *
+ * @tparam view The enum value of the desired view type.
+ * @tparam SourceAMF The type of the target AMF
+ *
+ */
+ template< enum view::Views view, typename SourceAMF >
+ struct Reshape {
+
+ typedef SourceAMF amf_type;
+
+ static amf_type Create( const SourceAMF &amf ) {
+ throw std::invalid_argument( "Not implemented for the provided view type." );
+ return amf;
+ }
+
+ Reshape() = delete;
+
+ }; // class Reshape
+
+ template< typename SourceAMF >
+ struct Reshape< view::original, SourceAMF > {
+
+ typedef SourceAMF amf_type;
+
+ static amf_type Create( const SourceAMF &amf ) {
+ return amf_type( amf.imf_r, amf.imf_c, amf.map_poly, amf.storage_dimensions );
+ }
+
+ Reshape() = delete;
+
+ }; // class Reshape< original, ... >
+
+ template< typename SourceAMF >
+ struct Reshape< view::transpose, SourceAMF > {
+
+ typedef AMF<
+ typename SourceAMF::imf_c_type,
+ typename SourceAMF::imf_r_type,
+ typename polynomials::apply_view<
+ view::transpose,
+ typename SourceAMF::mapping_polynomial_type
+ >::type,
+ backend
+ > amf_type;
+
+ static amf_type Create( const SourceAMF &amf ) {
+ typedef typename polynomials::apply_view< view::transpose, typename SourceAMF::mapping_polynomial_type >::type new_mapping_polynomial_type;
+ return AMF<
+ typename SourceAMF::imf_c_type,
+ typename SourceAMF::imf_r_type,
+ new_mapping_polynomial_type,
+ backend
+ >(
+ amf.imf_c,
+ amf.imf_r,
+ new_mapping_polynomial_type(
+ amf.map_poly.ay2, amf.map_poly.ax2, amf.map_poly.axy,
+ amf.map_poly.ay, amf.map_poly.ax,
+ amf.map_poly.a0
+ ),
+ amf.storage_dimensions
+ );
+ }
+
+ Reshape() = delete;
+
+ }; // class Reshape< transpose, ... >
+
+ /**
+ * Specialization for diagonal views
+ *
+ * Diagonal view is implemented by taking a square view over the matrix.
+ *
+ * \note \internal Converts a mapping polynomial from a bivariate-quadratic
+ * to univariate quadratic by summing j-factors into
+ * corresponding i-factors.
+ * Implicitely applies a largest possible square view by
+ * using Strided IMFs.
+ *
+ */
+ template< typename SourceAMF >
+ struct Reshape< view::diagonal, SourceAMF > {
+
+ private:
+
+ /** Short name of the original mapping polynomial type */
+ typedef typename SourceAMF::mapping_polynomial_type orig_p;
+
+ /** The type of the resulting polynomial */
+ typedef polynomials::BivariateQuadratic<
+ orig_p::Ax2 || orig_p::Ay2 || orig_p::Axy, 0, 0,
+ orig_p::Ax || orig_p::Ay, 0,
+ orig_p::A0, orig_p::D
+ > new_poly_type;
+
+ public:
+
+ typedef AMF< imf::Id, imf::Zero, new_poly_type, backend > amf_type;
+
+ static amf_type Create( const SourceAMF &amf ) {
+ assert( amf.getLogicalDimensions().first == amf.getLogicalDimensions().second );
+ return amf_type(
+ imf::Id( amf.getLogicalDimensions().first ),
+ imf::Zero( 1 ),
+ new_poly_type(
+ orig_p::Ax2 * amf.map_poly.ax2 + orig_p::Ay2 * amf.map_poly.ay2 + orig_p::Axy * amf.map_poly.axy, 0, 0,
+ orig_p::Ax * amf.map_poly.ax + orig_p::Ay * amf.map_poly.ay, 0,
+ amf.map_poly.a0
+ ),
+ amf.storage_dimensions
+ );
+ }
+
+ Reshape() = delete;
+
+ }; // class Reshape< diagonal, ... >
+
+ /**
+ * Specialization for matrix views over vectors
+ *
+ * \note \internal The resulting AMF is equivalent to applying
+ * a composition with two ID IMFs.
+ *
+ */
+ template< typename SourceAMF >
+ struct Reshape< view::matrix, SourceAMF > {
+
+ typedef typename Compose< imf::Id, imf::Id, SourceAMF >::amf_type amf_type;
+
+ static amf_type Create( const SourceAMF &amf ) {
+ return Compose< imf::Id, imf::Id, SourceAMF >::Create(
+ imf::Id( amf.getLogicalDimensions().first ),
+ imf::Id( amf.getLogicalDimensions().second ),
+ amf
+ );
+ }
+
+ Reshape() = delete;
+
+ }; // class Reshape< diagonal, ... >
+
+ }; // class AMFFactory
+
+ }; // namespace storage
+
+ namespace internal {
+
+ /**
+ * Determines the AMF type for a matrix having the provided static properties.
+ *
+ * For a matrix that requires allocation, the new AMF consists of two Id IMFs
+ * and the pre-defined mapping polynomial.
+ * For a view over another matrix, the new AMF is created from the AMF of the
+ * target matrix in one of the following ways:
+ * - When applying gather view using IMFs, the IMFs are applied to the AMF of
+ * the target matrix.
+ * - When applying a different view type (e.g. transpose or diagonal), the AMF
+ * of the target matrix is transformed according to the provided view type.
+ *
+ * @tparam View View type
+ * @tparam ImfR Row IMF type
+ * @tparam ImfC Column IMF type
+ * @tparam backend The backend
+ *
+ * The valid combinations of the input parameters are as follows:
+ * - original view on void with Id IMFs.
+ * - original view on ALP matrix with any type of IMFs
+ * - other type of views (e.g. transposed, diagonal) with only Id IMFs.
+ * Invocation using incompatible parameters may result in an undefined behavior.
+ * The first parameter combination is handled by a specialization of this trait.
+ *
+ */
+ template<
+ typename Structure, typename View, typename ImfR, typename ImfC,
+ enum Backend backend
+ >
+ struct determine_amf_type {
+
+ /** Ensure that the view is not on a void type */
+ static_assert(
+ !std::is_same< typename View::applied_to, void >::value,
+ "Cannot handle views over void type by this determine_amf_type specialization."
+ );
+
+ /** Ensure that if the view is original, the IMFs are Id */
+ static_assert(
+ View::type_id != view::original ||
+ ( View::type_id == view::original && std::is_same< imf::Id, ImfR >::value && std::is_same< imf::Id, ImfC >::value ),
+ "Original view with non-ID Index Mapping Functions is not supported."
+ );
+
+ /** Ensure that if the view is transposed, the IMFs are Id */
+ static_assert(
+ View::type_id != view::transpose ||
+ ( View::type_id == view::transpose && std::is_same< imf::Id, ImfR >::value && std::is_same< imf::Id, ImfC >::value ),
+ "Transposed view with non-ID Index Mapping Functions is not supported."
+ );
+
+ /** Ensure that if the view is diagonal, the row and column IMFs are Id and Zero, respectively */
+ static_assert(
+ View::type_id != view::diagonal ||
+ ( View::type_id == view::diagonal && std::is_same< imf::Id, ImfR >::value && std::is_same< imf::Zero, ImfC >::value ),
+ "Diagonal view with non-Id Row and non-Zero Column Index Mapping Functions is not supported."
+ );
+
+ typedef typename std::conditional<
+ View::type_id == view::gather,
+ typename storage::AMFFactory< backend >::template Compose<
+ ImfR, ImfC, typename View::applied_to::amf_type
+ >::amf_type,
+ typename storage::AMFFactory< backend >::template Reshape<
+ View::type_id,
+ typename View::applied_to::amf_type
+ >::amf_type
+ >::type type;
+
+ };
+
+ /** Specialization for storage-based containers that allocate storage */
+ template< typename Structure, typename ImfC, enum Backend backend >
+ struct determine_amf_type< Structure, view::Original< void >, imf::Id, ImfC, backend > {
+
+ static_assert(
+ std::is_same< ImfC, imf::Id >::value || std::is_same< ImfC, imf::Zero >::value,
+ "Incompatible combination of parameters provided to determine_amf_type."
+ );
+
+ typedef typename storage::AMFFactory< backend >::template FromPolynomial<
+ Structure, imf::Id, ImfC
+ >::amf_type type;
+ };
+
+ /** Specialization for functor-based containers that allocate storage */
+ template< typename Structure, typename ImfC, enum Backend backend, typename Lambda >
+ struct determine_amf_type< Structure, view::Functor< Lambda >, imf::Id, ImfC, backend > {
+
+ static_assert(
+ std::is_same< ImfC, imf::Id >::value || std::is_same< ImfC, imf::Zero >::value,
+ "Incompatible combination of parameters provided to determine_amf_type."
+ );
+
+ // A functor-based container does not have an AMF
+ typedef void type;
+ };
+
+ } // namespace internal
+
+} // namespace alp
+
+#endif // _H_ALP_AMF_BASED_STORAGE
diff --git a/include/alp/amf-based/storagebasedmatrix.hpp b/include/alp/amf-based/storagebasedmatrix.hpp
new file mode 100644
index 000000000..ec2c308d2
--- /dev/null
+++ b/include/alp/amf-based/storagebasedmatrix.hpp
@@ -0,0 +1,314 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _H_ALP_AMF_BASED_STORAGEBASEDMATRIX
+#define _H_ALP_AMF_BASED_STORAGEBASEDMATRIX
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include "storage.hpp"
+
+/**
+ * @todo This should not happen given the hierarchy of concepts. Fix by splitting
+ * \a internal::Vector from the various vector.hpp
+ */
+
+#if defined( _ALP_WITH_REFERENCE ) || defined( _ALP_OMP_WITH_REFERENCE )
+ #include
+#endif
+#if defined( _ALP_WITH_DISPATCH ) || defined( _ALP_OMP_WITH_DISPATCH )
+ #include
+#endif
+
+
+namespace alp {
+
+ namespace internal {
+
+ /** Forward declaration */
+ template< typename DerivedMatrix >
+ class MatrixBase;
+
+ template< typename DerivedMatrix >
+ std::pair< size_t, size_t > dims( const MatrixBase< DerivedMatrix > &A ) noexcept;
+
+ template<
+ typename MatrixType,
+ std::enable_if_t< internal::is_storage_based< MatrixType >::value > * = nullptr
+ >
+ size_t getStorageDimensions( const MatrixType &A ) noexcept;
+
+ template< typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType>::value > * = nullptr
+ >
+ bool getInitialized( const MatrixType &A ) noexcept;
+
+ template< typename MatrixType,
+ std::enable_if_t< is_matrix< MatrixType>::value > * = nullptr
+ >
+ void setInitialized( MatrixType &, const bool ) noexcept;
+
+ /** Forward declaration */
+ template< typename T, typename AmfType, bool requires_allocation, Backend backend >
+ class StorageBasedMatrix;
+
+ /** Container reference getters used by friend functions of specialized Matrix */
+ template< typename T, typename AmfType, bool requires_allocation, Backend backend >
+ const Vector< T, backend > & getContainer( const StorageBasedMatrix< T, AmfType, requires_allocation, backend > & A );
+
+ template< typename T, typename AmfType, bool requires_allocation, Backend backend >
+ Vector< T, backend > & getContainer( StorageBasedMatrix< T, AmfType, requires_allocation, backend > & A );
+
+ /** Container reference getters. Defer the call to base class friend function */
+ template<
+ typename T, typename Structure, enum Density density, typename View,
+ typename ImfR, typename ImfC,
+ Backend backend
+ >
+ const Vector< T, backend > & getContainer( const alp::Matrix< T, Structure, density, View, ImfR, ImfC, backend > & A ) {
+ return getContainer( static_cast<
+ const StorageBasedMatrix<
+ T,
+ typename alp::Matrix< T, Structure, density, View, ImfR, ImfC, backend >::amf_type,
+ alp::Matrix< T, Structure, density, View, ImfR, ImfC, backend >::requires_allocation,
+ backend
+ > &
+ >( A ) );
+ }
+
+ template<
+ typename T, typename Structure, enum Density density, typename View,
+ typename ImfR, typename ImfC,
+ Backend backend
+ >
+ Vector< T, backend > & getContainer( alp::Matrix< T, Structure, density, View, ImfR, ImfC, backend > & A ) {
+ return getContainer( static_cast<
+ StorageBasedMatrix<
+ T,
+ typename alp::Matrix< T, Structure, density, View, ImfR, ImfC, backend >::amf_type,
+ alp::Matrix< T, Structure, density, View, ImfR, ImfC, backend >::requires_allocation,
+ backend
+ > &
+ >( A ) );
+ }
+
+ /** Returns the reference to the AMF of a storage-based matrix */
+ template<
+ typename MatrixType,
+ std::enable_if< internal::is_storage_based< MatrixType >::value > * = nullptr
+ >
+ const typename MatrixType::amf_type &getAmf( const MatrixType &A ) noexcept;
+
+ /**
+ * Matrix container specialization
+ * Implements both original containers and views on containers.
+ * @tparam requires_allocation True if the class is an original container
+ * False if the class is a view of another matrix
+ */
+ template< typename T, typename AmfType, bool requires_allocation, Backend backend >
+ class StorageBasedMatrix : public MatrixBase< StorageBasedMatrix< T, AmfType, requires_allocation, backend > > {
+
+ template<
+ typename MatrixType,
+ std::enable_if_t< internal::is_storage_based< MatrixType >::value > *
+ >
+ friend size_t getStorageDimensions( const MatrixType &A ) noexcept;
+
+ /** Get the reference to the AMF of a storage-based matrix */
+ template<
+ typename MatrixType,
+ std::enable_if< internal::is_storage_based< MatrixType >::value > *
+ >
+ friend const typename MatrixType::amf_type &getAmf( const MatrixType &A ) noexcept;
+
+ public:
+
+ /** Expose static properties */
+
+ typedef T value_type;
+ typedef AmfType amf_type;
+ typedef typename AmfType::imf_r_type imf_r_type;
+ typedef typename AmfType::imf_c_type imf_c_type;
+ /** Type returned by access function */
+ typedef T &access_type;
+ typedef const T &const_access_type;
+ /** Type of the index used to access the physical storage */
+ typedef size_t storage_index_type;
+
+ protected:
+ typedef StorageBasedMatrix< T, AmfType, requires_allocation, backend > self_type;
+ friend MatrixBase< self_type >;
+
+ typedef typename std::conditional<
+ requires_allocation,
+ Vector< T, backend >,
+ Vector< T, backend > &
+ >::type container_type;
+
+ /** A container-type view is characterized by its association with a physical container */
+ container_type container;
+
+ /**
+ * All matrix views use a pair of index mapping functions to
+ * capture the correspondence between their logical layout and the one
+ * of their underlying container. This may be another view leading to a composition
+ * of IMFs between the top matrix view and the physical container.
+ * Original matrix containers's index mapping functions are an identity mapping.
+ */
+ //ImfR imf_r;
+ //ImfC imf_c;
+
+ /**
+ * The container's storage scheme. \a storage_scheme is not exposed to the user as an option
+ * but can defined by ALP at different points in the execution depending on the \a backend choice.
+ * In particular, if the structured matrix is not a temporary matrix than it is fixed at construction
+ * time when the allocation takes place.
+ * If the structured matrix is a temporary one than a storage storage scheme choice may or may not be
+ * made depending on whether a decision about instantiating the matrix is made by the framework.
+ *
+ * The specific storage scheme choice depends on the chosen backend and the structure of the matrix.
+ * \internal \todo Revisit this when introducing storage mapping functions.
+ */
+ //Smf smf;
+
+ /**
+ * Access mapping function maps a pair of logical coordinates
+ * into the concrete coordinate inside the actual container.
+ * \see AMF
+ */
+ AmfType amf;
+ /**
+ * @brief determines the size of the matrix via the domain of
+ * the index mapping functions.
+ *
+ * @return A pair of dimensions.
+ */
+ std::pair< size_t, size_t > dims() const noexcept {
+ return amf.getLogicalDimensions();
+ }
+
+ size_t getStorageDimensions() const noexcept {
+ return amf.getStorageDimensions();
+ }
+
+ friend const Vector< T, backend > & getContainer( const self_type &A ) {
+ return A.container;
+ }
+
+ friend Vector< T, backend > & getContainer( self_type &A ) {
+ return A.container;
+ }
+
+ bool getInitialized() const noexcept {
+ return internal::getInitialized( container );
+ }
+
+ void setInitialized( const bool initialized ) noexcept {
+ internal::setInitialized( container , initialized );
+ }
+
+ const AmfType &getAmf() const noexcept {
+ return amf;
+ }
+
+ /**
+ * Returns a constant reference to the element corresponding to
+ * the provided storage index.
+ *
+ * @param storageIndex storage index in the physical iteration
+ * space.
+ *
+ * @return const reference or value of the element at given position.
+ */
+ const_access_type access( const storage_index_type &storageIndex ) const {
+ return container[ storageIndex ];
+ }
+
+ access_type access( const storage_index_type &storageIndex ) {
+ return container[ storageIndex ];
+ }
+
+ storage_index_type getStorageIndex( const size_t i, const size_t j, const size_t s, const size_t P ) const {
+ return amf.getStorageIndex( i, j, s, P );
+ }
+
+ /**
+ * @brief Construct a new structured matrix Base object assigning identity
+ * mapping functions both to the row and column dimensions.
+ *
+ * @param rows The number of rows of the matrix.
+ * @param cols The number of columns of the matrix.
+ * @param smf The storage mapping function assigned to this matrix.
+ */
+ /** (Documentation of old MatrixCotiainer. TODO: merge with the above docs.
+ * @brief Construct a new structured matrix container object.
+ *
+ * \warning \a cap is present for compatibility with other matrix specializations.
+ * In reference backend, the number of non-zeros (i.e. capacity)
+ * depends on the used storage scheme. Therefore, this parameter is
+ * ignored.
+ *
+ * TODO: Add the storage scheme a parameter to the constructor
+ * so that allocation can be made accordingly, generalizing the full case.
+ */
+ StorageBasedMatrix( AmfType &&amf ) :
+ // enable only if ImfR and ImfC are imf::Id
+ container( internal::Vector< T, backend >( amf.getStorageDimensions() ) ),
+ amf( std::move( amf ) ) {}
+
+ /** View on another container */
+ StorageBasedMatrix( Vector< T, backend > &container, AmfType &&amf ) :
+ container( container ),
+ amf( std::move( amf ) ) {}
+
+ /** View on another raw container */
+ StorageBasedMatrix( T *buffer, const size_t buffer_size, AmfType &&amf ) :
+ container( buffer, buffer_size ),
+ amf( std::move( amf ) ) {}
+
+ }; // class StorageBasedMatrix
+
+
+ /** Get the reference to the AMF of a storage-based matrix */
+ template<
+ typename MatrixType,
+ std::enable_if< internal::is_storage_based< MatrixType >::value > *
+ >
+ const typename MatrixType::amf_type &getAmf( const MatrixType &A ) noexcept {
+ return A.getAmf();
+ }
+
+ } // namespace internal
+
+ template<
+ typename MatrixType,
+ std::enable_if< internal::is_storage_based< MatrixType >::value > * = nullptr
+ >
+ size_t internal::getStorageDimensions( const MatrixType &A ) noexcept {
+ static_assert( is_storage_based< MatrixType >::value, "getStorageDimensions supported only for storage-based containers.");
+ return static_cast< const typename MatrixType::base_type & >( A ).getStorageDimensions();
+ }
+
+} // namespace alp
+
+#endif // end ``_H_ALP_AMF_BASED_STORAGEBASEDMATRIX''
diff --git a/include/alp/amf-based/vector.hpp b/include/alp/amf-based/vector.hpp
new file mode 100644
index 000000000..0a6c2c972
--- /dev/null
+++ b/include/alp/amf-based/vector.hpp
@@ -0,0 +1,729 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 14th of January 2022
+ */
+
+#ifndef _H_ALP_AMF_BASED_VECTOR
+#define _H_ALP_AMF_BASED_VECTOR
+
+
+#include
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "matrix.hpp"
+#include "storage.hpp"
+
+
+namespace alp {
+
+ namespace internal {
+
+ template< typename T, typename Structure, typename View, typename ImfR, typename ImfC, enum Backend backend >
+ size_t getLength( const alp::Vector< T, Structure, Density::Dense, View, ImfR, ImfC, backend > &v ) noexcept {
+ return v._length();
+ }
+
+ template< typename T, typename Structure, typename View, typename ImfR, typename ImfC, enum Backend backend >
+ bool getInitialized( const alp::Vector< T, Structure, Density::Dense, View, ImfR, ImfC, backend > &v ) noexcept {
+ return getInitialized( static_cast< const typename alp::Vector< T, Structure, Density::Dense, View, ImfR, ImfC, backend >::base_type & >( v ) );
+ }
+
+ template<
+ typename T, typename Structure, typename View,
+ typename ImfR, typename ImfC, enum Backend backend
+ >
+ void setInitialized(
+ alp::Vector< T, Structure, Density::Dense, View, ImfR, ImfC, backend > &v, bool initialized
+ ) noexcept {
+ setInitialized( static_cast< typename alp::Vector< T, Structure, Density::Dense, View, ImfR, ImfC, backend >::base_type &>( v ), initialized );
+ }
+
+ template<
+ typename T, typename Structure, typename View,
+ typename ImfR, typename ImfC, enum Backend backend
+ >
+ typename alp::Vector<
+ T, Structure, Density::Dense, View, ImfR, ImfC, backend
+ >::iterator
+ begin(
+ alp::Vector< T, Structure, Density::Dense, View, ImfR, ImfC, backend > &v
+ ) noexcept;
+
+ template<
+ typename T, typename Structure, typename View,
+ typename ImfR, typename ImfC, enum Backend backend
+ >
+ typename alp::Vector<
+ T, Structure, Density::Dense, View, ImfR, ImfC, backend
+ >::iterator
+ end(
+ alp::Vector< T, Structure, Density::Dense, View, ImfR, ImfC, backend > &v
+ ) noexcept;
+
+ } // end namespace ``alp::internal''
+
+ /**
+ * \brief An ALP vector view.
+ *
+ * This is an opaque data type for vector views.
+ *
+ * A vector exposes a mathematical \em logical layout which allows to
+ * express implementation-oblivious concepts such as \em views on the vector.
+ * The logical layout of a vector view maps to a physical counterpart via
+ * a storage scheme which typically depends on the selected backend.
+ * alp::Vector may be used as an interface to such a physical layout.
+ *
+ * Views can be used to create logical \em perspectives on top of a container.
+ * For example, one may decide to refer to the part of the vector or
+ * to reference a diagonal of a matrix as a vector.
+ * See specialization \a Vector< T, Structure, Density::Dense, view::Diagonal< MatrixT >, backend >
+ * as an example of such usage.
+ *
+ * Vector View defined as views on other vectors do not instantiate a
+ * new container but refer to the one used by their targets.
+ *
+ * @tparam T type.
+ * @tparam Structure Structure introduced to match the template
+ * parameter list of \a Matrix
+ * @tparam View One of the vector views.
+ * All static views except for \a view::Original (via
+ * \a view::Original cannot instantiate a new container
+ * and only allow to refer to a previously defined
+ * \a Vector.
+ * The \a View parameter should not be used directly
+ * by the user but can be set using specific member types
+ * appropriately defined by each Vector and
+ * accessible via functions.
+ *
+ */
+ template<
+ typename T, typename Structure, typename View,
+ typename ImfR, typename ImfC, enum Backend backend
+ >
+ class Vector< T, Structure, Density::Dense, View, ImfR, ImfC, backend > { };
+
+ /*
+ * ALP vector with a general structure
+ */
+ template<
+ typename T, typename View,
+ typename ImfR, typename ImfC, enum Backend backend
+ >
+ class Vector<
+ T, structures::General, Density::Dense, View, ImfR, ImfC, backend
+ > : public Matrix< T, structures::General, Density::Dense, View, ImfR, ImfC, backend > {
+
+ public:
+
+ typedef Vector<
+ T, structures::General, Density::Dense,
+ View, ImfR, ImfC, backend
+ > self_type;
+
+ typedef Matrix<
+ T, structures::General, Density::Dense,
+ View, ImfR, ImfC, backend
+ > base_type;
+
+ private:
+ class VectorIterator:
+ public std::iterator< std::random_access_iterator_tag, T > {
+
+ friend class Vector<
+ T, structures::General, Density::Dense,
+ View, ImfR, ImfC, backend
+ >;
+
+ private:
+
+ typedef typename self_type::storage_index_type index_type;
+ typedef std::iterator<
+ std::random_access_iterator_tag, T
+ > std_base_class;
+
+ self_type *vec;
+ index_type position;
+
+ VectorIterator( self_type *vptr ) noexcept :
+ vec( vptr ), position( 0 )
+ {}
+
+ VectorIterator( self_type *vptr, index_type pos ) noexcept :
+ vec( vptr ), position( pos )
+ {}
+
+ bool equal( const VectorIterator &other ) const noexcept {
+ return ( vec == other.vec ) && ( position == other.position );
+ }
+
+ bool lessThen( const VectorIterator &other ) const noexcept {
+ return ( vec == other.vec ) && ( position < other.position );
+ }
+
+ public:
+ typedef typename std_base_class::pointer pointer;
+ typedef typename std_base_class::reference reference;
+ typedef typename std_base_class::difference_type difference_type;
+
+ /** Default constructor. */
+ VectorIterator() noexcept :
+ vec( nullptr ), position( 0 )
+ {}
+
+ /** Copy constructor. */
+ VectorIterator( const VectorIterator &other ) noexcept :
+ vec( other.vec ),
+ position( other.position )
+ {}
+
+ /** Move constructor. */
+ VectorIterator( VectorIterator &&other ) :
+ vec( nullptr ), position( 0 )
+ {
+ std::swap( vec, other.vec );
+ std::swap( position, other.position );
+ }
+
+ /** Copy assignment. */
+ VectorIterator& operator=( const VectorIterator &other ) noexcept {
+ vec = other.vec;
+ position = other.position;
+ return *this;
+ }
+
+ /** Move assignment. */
+ VectorIterator& operator=( VectorIterator &&other ) {
+ vec = nullptr;
+ position = 0;
+ std::swap( vec, other.vec );
+ std::swap( position, other.position );
+ return *this;
+ }
+
+ reference operator*() const {
+ return ( *vec )[ position ];
+ }
+
+ VectorIterator& operator++() {
+ ++position;
+ return *this;
+ }
+
+ VectorIterator& operator--() {
+ --position;
+ return *this;
+ }
+
+ VectorIterator operator++( int ) {
+ return VectorIterator( vec, position++ );
+ }
+
+ VectorIterator operator--( int ) {
+ return VectorIterator( vec, position-- );
+ }
+
+ VectorIterator operator+( const difference_type &n ) const {
+ return VectorIterator( vec, ( position + n ) );
+ }
+
+ VectorIterator& operator+=( const difference_type &n ) {
+ position += n;
+ return *this;
+ }
+
+ VectorIterator operator-( const difference_type &n ) const {
+ return VectorIterator( vec, ( position - n ) );
+ }
+
+ VectorIterator& operator-=( const difference_type &n ) {
+ position -= n;
+ return *this;
+ }
+
+ reference operator[]( const difference_type &n ) const {
+ return ( *vec )[ position + n ];
+ }
+
+ bool operator==( const VectorIterator &other ) const {
+ return equal( other );
+ }
+
+ bool operator!=( const VectorIterator &other ) const {
+ return !equal( other );
+ }
+
+ bool operator<( const VectorIterator &other ) const {
+ return lessThen( other );
+ }
+
+ bool operator>( const VectorIterator &other ) const {
+ return !( lessThen( other ) || equal( other ) );
+ }
+
+ bool operator<=( const VectorIterator &other ) const {
+ return lessThen( other ) || equal( other );
+ }
+
+ bool operator>=( const VectorIterator &other ) const {
+ return !lessThen( other );
+ }
+
+ difference_type operator+( const VectorIterator &other ) const {
+ assert( other.vec == vec );
+ return position + other.position;
+ }
+
+ difference_type operator-( const VectorIterator &other ) const {
+ assert( other.vec == vec );
+ return position - other.position;
+ }
+ };
+
+ /*********************
+ Storage info friends
+ ******************** */
+
+ friend size_t internal::getLength<>( const Vector< T, structures::General, Density::Dense, View, ImfR, ImfC, backend > &v ) noexcept;
+
+ /** Returns the length of the vector */
+ size_t _length() const {
+ return nrows( static_cast< const base_type & >( *this ) );
+ }
+
+ VectorIterator begin() noexcept {
+ return VectorIterator( this );
+ }
+
+ VectorIterator end() noexcept {
+ return VectorIterator( this, _length() );
+ }
+
+
+ public:
+
+ typedef VectorIterator iterator;
+
+ friend iterator internal::begin<>( self_type &v ) noexcept;
+ friend iterator internal::end<>( self_type &v ) noexcept;
+
+ /** @see Vector::value_type. */
+ using value_type = T;
+
+ typedef structures::General structure;
+
+ /** @see Vector::lambda_reference */
+ typedef typename std::conditional<
+ internal::is_storage_based< self_type >::value,
+ T &,
+ T
+ >::type lambda_reference;
+ typedef typename std::conditional<
+ internal::is_storage_based< self_type >::value,
+ const T &,
+ const T
+ >::type const_lambda_reference;
+
+ template < view::Views view_tag, bool d=false >
+ struct view_type;
+
+ template < bool d >
+ struct view_type< view::original, d > {
+ typedef Vector< T, structures::General, Density::Dense, view::Original< self_type >, imf::Id, imf::Id, backend > type;
+ };
+
+ template < bool d >
+ struct view_type< view::gather, d > {
+ typedef Vector< T, structures::General, Density::Dense, view::Gather< self_type >, imf::Strided, imf::Id, backend > type;
+ };
+
+ template < bool d >
+ struct view_type< view::matrix, d > {
+ typedef Matrix< T, structures::General, Density::Dense, view::Matrix< self_type >, imf::Id, imf::Id, backend > type;
+ };
+
+ /**
+ * Constructor for a storage-based vector that allocates storage.
+ */
+ Vector( const size_t length, const size_t cap = 0 ) :
+ base_type( length, 1, cap ) {
+ static_assert(
+ internal::is_view_over_storage< View >::value &&
+ internal::requires_allocation< View >::value,
+ "This constructor can only be used in storage-based allocation-requiring Vector specializations."
+ );
+ }
+
+ /**
+ * Constructor for a view over another storage-based vector.
+ *
+ * @tparam SourceType The type of the target vector.
+ *
+ */
+ template<
+ typename SourceType,
+ std::enable_if_t<
+ std::is_same< SourceType, typename View::applied_to >::value &&
+ internal::is_view_over_storage< View >::value &&
+ !internal::requires_allocation< View >::value
+ > * = nullptr
+ >
+ Vector( SourceType &source_vector, ImfR imf_r, ImfC imf_c ) :
+ base_type( source_vector, imf_r, imf_c ) { }
+
+ /**
+ * Constructor for a view over another vector applying a view defined
+ * by View template parameter of the constructed vector.
+ *
+ * @tparam SourceType The type of the target vector.
+ *
+ */
+ template<
+ typename SourceType,
+ std::enable_if_t<
+ std::is_same< SourceType, typename View::applied_to >::value &&
+ internal::is_view_over_storage< View >::value &&
+ !internal::requires_allocation< View >::value
+ > * = nullptr
+ >
+ Vector( SourceType &source_vector ) :
+ base_type( source_vector ) {}
+
+ /**
+ * @deprecated
+ * Constructor for a view over another storage-based vector.
+ *
+ * @tparam SourceType The type of the target vector.
+ *
+ */
+ template<
+ typename SourceType,
+ typename AmfType,
+ std::enable_if_t<
+ std::is_same< SourceType, typename View::applied_to >::value &&
+ internal::is_view_over_storage< View >::value &&
+ !internal::requires_allocation< View >::value
+ > * = nullptr
+ >
+ Vector( SourceType &source_vector, AmfType &&amf ) :
+ base_type( source_vector, std::forward< AmfType >( amf ) ) {}
+
+ /**
+ * Constructor for a functor-based vector that allocates memory.
+ *
+ * @tparam LambdaType The type of the lambda function associated to the data.
+ *
+ */
+ template<
+ typename LambdaType,
+ std::enable_if_t<
+ std::is_same< LambdaType, typename View::applied_to >::value &&
+ internal::is_view_over_functor< View >::value &&
+ internal::requires_allocation< View >::value
+ > * = nullptr
+ >
+ Vector( std::function< bool() > initialized, const size_t length, LambdaType lambda ) :
+ base_type( initialized, length, 1, lambda ) {}
+
+ /**
+ * Constructor for a view over another functor-based vector.
+ *
+ * @tparam SourceType The type of the target vector.
+ *
+ */
+ template<
+ typename SourceType,
+ std::enable_if_t<
+ std::is_same< SourceType, typename View::applied_to >::value &&
+ internal::is_view_over_functor< View >::value &&
+ !internal::requires_allocation< View >::value
+ > * = nullptr
+ >
+ Vector( SourceType &target_vector, ImfR imf_r, ImfC imf_c ) :
+ base_type( getFunctor( target_vector ), imf_r, imf_c ) {}
+
+ /**
+ * Constructor for a view over another functor-based vector.
+ *
+ * @tparam SourceType The type of the target vector.
+ *
+ */
+ template<
+ typename SourceType,
+ std::enable_if_t<
+ std::is_same< SourceType, typename View::applied_to >::value &&
+ internal::is_view_over_functor< View >::value &&
+ !internal::requires_allocation< View >::value
+ > * = nullptr
+ >
+ Vector( SourceType &target_vector ) :
+ base_type( getFunctor( target_vector ),
+ imf::Id( nrows ( target_vector ) ),
+ imf::Id( 1 )
+ ) {
+
+ static_assert(
+ std::is_same< ImfR, imf::Id >::value &&
+ std::is_same< ImfC, imf::Id >::value,
+ "This constructor can only be used with Id IMFs."
+ );
+
+ }
+
+ /** \internal No implementation notes. */
+ lambda_reference operator[]( const size_t i ) noexcept {
+ assert( i < _length() );
+ //assert( getInitialized( *v ) );
+ /** \internal \todo revise the third and fourth parameter for parallel backends */
+ return this->access( this->getStorageIndex( i, 0, 0, 1 ) );
+ }
+
+ /** \internal No implementation notes. */
+ const_lambda_reference operator[]( const size_t i ) const noexcept {
+ assert( i < _length() );
+ //assert( getInitialized( *v ) );
+ /** \internal \todo revise the third and fourth parameter for parallel backends */
+ return this->access( this->getStorageIndex( i, 0, 0, 1 ) );
+ }
+
+ }; // class Vector with physical container
+
+ namespace internal {
+
+ template<
+ typename T, typename Structure, typename View,
+ typename ImfR, typename ImfC, enum Backend backend
+ >
+ typename alp::Vector<
+ T, Structure, Density::Dense, View, ImfR, ImfC, backend
+ >::iterator
+ begin(
+ alp::Vector< T, Structure, Density::Dense, View, ImfR, ImfC, backend > &v
+ ) noexcept {
+ return v.begin();
+ }
+
+ template<
+ typename T, typename Structure, typename View,
+ typename ImfR, typename ImfC, enum Backend backend
+ >
+ typename alp::Vector<
+ T, Structure, Density::Dense, View, ImfR, ImfC, backend
+ >::iterator
+ end(
+ alp::Vector< T, Structure, Density::Dense, View, ImfR, ImfC, backend > &v
+ ) noexcept {
+ return v.end();
+ }
+
+ } // end namespace ``alp::internal''
+
+ /** Identifies any backend's implementation of ALP vector as an ALP vector. */
+ template< typename T, typename Structure, typename View, typename ImfR, typename ImfC, enum Backend backend >
+ struct is_vector< Vector< T, Structure, Density::Dense, View, ImfR, ImfC, backend > > : std::true_type {};
+
+ /**
+ * @brief Generate an original view of the input Vector. The function guarantees
+ * the created view is non-overlapping with other existing views only when the
+ * check can be performed in constant time.
+ *
+ * @tparam SourceVector The type of the source ALP vector
+ *
+ * @param[in] source The ALP Vector object over which the view is created.
+ *
+ * @returns A new ALP Vector object.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function performs
+ * \f$ \Theta(nref) \f$ amount of work where \f$ nref \f$ is the number
+ * of available views of \a source.
+ * -# A call to this function may use \f$ \mathcal{O}(1) \f$ bytes
+ * of memory beyond the memory in use at the function call entry.
+ * -# This function may make system calls.
+ * \endparblock
+ *
+ */
+ template<
+ typename SourceVector,
+ std::enable_if_t< is_vector< SourceVector >::value > * = nullptr
+ >
+ typename SourceVector::template view_type< view::original >::type
+ get_view( SourceVector &source ) {
+
+ using target_t = typename SourceVector::template view_type< view::original >::type;
+
+ return target_t( source );
+ }
+
+ /**
+ * Create a matrix view over a vector.
+ * The resulting matrix is a column matrix of size M x 1, where M is vector length.
+ * The function guarantees the created view is non-overlapping with other
+ * existing views only when the check can be performed in constant time.
+ *
+ * @tparam target_view The type of the view to apply to the vector.
+ * Only supports value view::matrix.
+ * @tparam SourceVector The type of the source ALP vector
+ *
+ * @param[in] source The ALP Vector object over which the view is created.
+ *
+ */
+ template<
+ enum view::Views target_view,
+ typename SourceVector,
+ std::enable_if_t<
+ is_vector< SourceVector >::value &&
+ target_view == view::matrix
+ > * = nullptr
+ >
+ typename SourceVector::template view_type< target_view >::type
+ get_view( SourceVector &source ) {
+ using target_t = typename SourceVector::template view_type< target_view >::type;
+ return target_t( source );
+ }
+
+ namespace internal {
+
+ /**
+ * Implement a gather through a View over compatible Structure using provided Index Mapping Functions.
+ * The compatibility depends on the TargetStructure, SourceStructure and IMFs, and is calculated during runtime.
+ */
+ template<
+ typename TargetStructure, typename TargetImfR,
+ typename SourceVector,
+ std::enable_if_t< is_vector< SourceVector >::value > * = nullptr
+ >
+ typename internal::new_container_type_from<
+ typename SourceVector::template view_type< view::gather >::type
+ >::template change_structure< TargetStructure >::_and_::
+ template change_imfr< TargetImfR >::type
+ get_view(
+ SourceVector &source,
+ TargetImfR imf_r,
+ imf::Id imf_c
+ ) {
+
+ //if( std::dynamic_pointer_cast< imf::Select >( imf_r ) || std::dynamic_pointer_cast< imf::Select >( imf_c ) ) {
+ // throw std::runtime_error("Cannot gather with imf::Select yet.");
+ //}
+ // No static check as the compatibility depends on IMF, which is a runtime level parameter
+ //if( ! (TargetStructure::template isInstantiableFrom< Structure >( static_cast< TargetImfR & >( imf_r ), static_cast< TargetImfR & >( imf_c ) ) ) ) {
+ if( ! (structures::isInstantiable< typename SourceVector::structure, TargetStructure >::check( imf_r, imf_c ) ) ) {
+ throw std::runtime_error("Cannot gather into specified TargetStructure from provided SourceStructure and Index Mapping Functions.");
+ }
+
+ using target_vec_t = typename internal::new_container_type_from<
+ typename SourceVector::template view_type< view::gather >::type
+ >::template change_structure< TargetStructure >::_and_::
+ template change_imfr< TargetImfR >::type;
+
+ return target_vec_t( source, imf_r, imf_c );
+ }
+ } // namespace internal
+
+ /**
+ * @brief Version of get_view over vectors where a range of elements are selected to form a new view.
+ * The function guarantees the created view is non-overlapping with other existing views only when the
+ * check can be performed in constant time.
+ *
+ * @tparam SourceVector The type of the source ALP vector
+ *
+ * @param[in] source The ALP Vector object over which the view is created.
+ * @param[in] rng A valid range of elements
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function performs
+ * \f$ \Theta(nref) \f$ amount of work where \f$ nref \f$ is the number
+ * of available views of \a source.
+ * -# A call to this function may use \f$ \mathcal{O}(1) \f$ bytes
+ * of memory beyond the memory in use at the function call entry.
+ * -# This function may make system calls.
+ * \endparblock
+ *
+ */
+ template<
+ typename SourceVector,
+ std::enable_if_t< is_vector< SourceVector >::value > * = nullptr
+ >
+ typename SourceVector::template view_type< view::gather >::type
+ get_view( SourceVector &source, const utils::range& rng ) {
+
+ return internal::get_view< typename SourceVector::structure >(
+ source,
+ std::move( imf::Strided( rng.count(), nrows(source), rng.start, rng.stride ) ),
+ std::move( imf::Id( 1 ) )
+ );
+ }
+
+ /**
+ *
+ * Generate a dynamic gather view where the type is compliant with the source Vector.
+ * Version where a selection of indices, expressed as a vector of indices,
+ * form a new view with specified target structure.
+ *
+ * @tparam TargetStructure The target structure of the new view. It should verify
+ * alp::is_in .
+ * @tparam SourceVector The type of the source ALP vector
+ * @tparam SelectVector The type of the ALP vector defining permutation for rows
+ *
+ * @param source The source ALP matrix
+ * @param sel A valid permutation vector of a subset of indices
+ *
+ * @return A new gather view over the source ALP matrix.
+ *
+ */
+ template<
+ typename TargetStructure,
+ typename SourceVector,
+ typename SelectVector,
+ std::enable_if_t<
+ is_vector< SourceVector >::value &&
+ is_vector< SelectVector >::value
+ > * = nullptr
+ >
+ typename internal::new_container_type_from<
+ typename SourceVector::template view_type< view::gather >::type
+ >::template change_structure< TargetStructure >::_and_::
+ template change_imfr< imf::Select >::type
+ get_view(
+ SourceVector &source,
+ const SelectVector &sel
+ ) {
+ return internal::get_view< TargetStructure >(
+ source,
+ imf::Select( size( source ), sel ),
+ imf::Id( 1 )
+ );
+ }
+
+} // end namespace ``alp''
+
+#endif // end ``_H_ALP_AMF_BASED_VECTOR''
+
diff --git a/include/alp/backends.hpp b/include/alp/backends.hpp
new file mode 100644
index 000000000..1e4d6ec42
--- /dev/null
+++ b/include/alp/backends.hpp
@@ -0,0 +1,60 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @author: A. N. Yzelman
+ * @date 21st of December, 2016
+ *
+ * @file This file contains a register of all backends that are either
+ * implemented, under implementation, or were at any point in time
+ * conceived and noteworthy enough to be recorded for future
+ * consideration to implement. It does so via the alp::Backend
+ * enum.
+ */
+
+#ifndef _H_ALP_BACKENDS
+#define _H_ALP_BACKENDS
+
+namespace alp {
+
+ /**
+ * This enum collects all implemented backends. Depending on compile flags,
+ * some of these options may be disabled.
+ */
+ enum Backend {
+
+ /**
+ * The ALP reference backend.
+ */
+ reference,
+
+ /*
+ * The ALP dispatch backend.
+ */
+ dispatch,
+
+ /**
+ * The ALP OpenMP backend.
+ */
+ omp,
+
+ };
+
+} // namespace alp
+
+#endif
+
diff --git a/include/alp/base/blas0.hpp b/include/alp/base/blas0.hpp
new file mode 100644
index 000000000..460eaa347
--- /dev/null
+++ b/include/alp/base/blas0.hpp
@@ -0,0 +1,457 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 5th of December 2016
+ */
+
+#ifndef _H_ALP_BLAS0_BASE
+#define _H_ALP_BLAS0_BASE
+
+#include //enable_if
+
+#include
+#include
+
+#include "config.hpp"
+#include "scalar.hpp"
+
+namespace alp {
+
+ /**
+ * \defgroup BLAS0 The Level-0 Basic Linear Algebra Subroutines (BLAS)
+ *
+ * A collection of functions that let GraphBLAS operators work on
+ * zero-dimensional containers, i.e., on scalars.
+ *
+ * The GraphBLAS uses opaque data types and defines several standard functions
+ * to operate on these data types. Examples types are alp::Vector and
+ * alp::Matrix, example functions are alp::dot and alp::vxm.
+ *
+ * To input data into an opaque GraphBLAS type, each opaque type defines a
+ * member function \a build: alp::Vector::build() and alp::Matrix::build().
+ *
+ * To extract data from opaque GraphBLAS types, each opaque type provides
+ * \em iterators that may be obtained via the STL standard \a begin and \a end
+ * functions:
+ * - alp::Vector::begin or alp::Vector::cbegin
+ * - alp::Vector::end or alp::Vector::cend
+ * - alp::Matrix::begin or alp::Matrix::cbegin
+ * - alp::Matrix::end or alp::Matrix::cend
+ *
+ * Some GraphBLAS functions, however, reduce all elements in a GraphBLAS
+ * container into a single element of a given type. So for instance, alp::dot
+ * on two vectors of type alp::Vector using the regular real semiring
+ * alp::Semiring will store its output in a variable of type \a double.
+ *
+ * When parametrising GraphBLAS functions in terms of arbitrary Semirings,
+ * Monoids, Operators, and object types, it is useful to have a way to apply
+ * the same operators on whatever type they make functions like alp::dot
+ * produce-- that is, we require functions that enable the application of
+ * GraphBLAS operators on single elements.
+ *
+ * This group of BLAS level 0 functions provides this functionality.
+ *
+ * @{
+ */
+
+ /**
+ * Out-of-place application of the operator \a OP on two data elements.
+ *
+ * The output data will be output to an existing memory location, overwriting
+ * any existing data.
+ *
+ * @tparam descr The descriptor passed to this operator.
+ * @tparam OP The type of the oparator to apply.
+ * @tparam InputType1 The left-hand side input argument type.
+ * @tparam InputType2 The right-hand side input argument type.
+ * @tparam OutputType The output argument type.
+ *
+ * \parblock
+ * \par Valid descriptors
+ * -# alp::descriptors::no_operation for default behaviour.
+ * -# alp::descriptors::no_casting when a call to this function should *not*
+ * automatically cast input arguments to operator input domain, and *not*
+ * automatically cast operator output to the output argument domain.
+ * \endparblock
+ *
+ * If \a InputType1 does not match the left-hand side input domain of \a OP,
+ * or if \a InputType2 does not match the right-hand side input domain of
+ * \a OP, or if \a OutputType does not match the output domain of \a OP while
+ * alp::descriptors::no_casting was set, then the code shall not compile.
+ *
+ * @param[in] x The left-hand side input data.
+ * @param[in] y The right-hand side input data.
+ * @param[out] out Where to store the result of the operator.
+ * @param[in] op The operator to apply (optional).
+ *
+ * \note \a op is optional when the operator type \a OP is explicitly given.
+ * Thus there are two ways of calling this function:
+ * -#
+ * Scalar< double > a, b, c;
+ * alp::apply< alp::operators::add >( a, b, c );
+ * , or
+ * -#
+ * Scalar< double > a, b, c;
+ * alp::operators::add< double > addition_over_doubles;
+ * alp::apply( a, b, c, addition_over_doubles);
+ *
+ *
+ * \note There should be no performance difference between the two ways of
+ * calling this function. For compatibility with other ALP
+ * implementations, the latter type of call is preferred.
+ *
+ * @return alp::SUCCESS A call to this function never fails.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This call comprises \f$ \Theta(1) \f$ work. The constant factor
+ * depends on the cost of evaluating the operator.
+ * -# This call takes \f$ \mathcal{O}(1) \f$ memory beyond the memory
+ * already used by the application when a call to this function is
+ * made.
+ * -# This call incurs at most \f$ \Theta(1) \f$ memory where the
+ * constant factor depends on the storage requirements of the
+ * arguments and the temporary storage required for evaluation of
+ * this operator.
+ * \endparblock
+ *
+ * \warning The use of stateful operators, or even thus use of stateless
+ * operators that are not included in alp::operators, may cause this
+ * function to incur performance penalties beyond the worst case
+ * sketched above.
+ *
+ * @see foldr for applying an operator in-place (if allowed).
+ * @see foldl for applying an operator in-place (if allowed).
+ * @see alp::operators::internal::Operator for a discussion on when foldr and
+ * foldl successfully generate in-place code.
+ */
+ template<
+ class OP,
+ typename InputType1, typename InputStructure1,
+ typename InputType2, typename InputStructure2,
+ typename OutputType, typename OutputStructure,
+ enum Backend implementation = config::default_backend
+ >
+ RC apply(
+ Scalar< OutputType, OutputStructure, implementation > &out,
+ const Scalar< InputType1, InputStructure1, implementation > &x,
+ const Scalar< InputType2, InputStructure2, implementation > &y,
+ const OP &op = OP(),
+ const std::enable_if_t<
+ alp::is_operator< OP >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ !alp::is_object< OutputType >::value
+ > * = nullptr
+ ) {
+#ifdef _DEBUG
+ std::cerr << "Selected backend does not implement alp::apply (scalar)\n";
+#endif
+#ifndef NDEBUG
+ const bool backend_does_not_support_scalar_apply = false;
+ assert( backend_does_not_support_scalar_apply );
+#endif
+
+ (void) out;
+ (void) x;
+ (void) y;
+ (void) op;
+
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Application of the operator \a OP on two data elements. The output data
+ * will overwrite the right-hand side input element.
+ *
+ * In mathematical notation, this function calculates \f$ x \odot y \f$ and
+ * copies the result into \a y.
+ *
+ * @tparam OP The type of the operator to apply.
+ * @tparam InputType The type of the left-hand side input element. This
+ * element will be accessed read-only.
+ * @tparam IOType The type of the right-hand side input element, which will
+ * be overwritten.
+ *
+ * \parblock
+ * \par Valid descriptors
+ * -# alp::descriptors::no_operation for default behaviour.
+ * -# alp::descriptors::no_casting when a call to this function should *not*
+ * automatically cast input arguments to operator input domain, and *not*
+ * automatically cast operator output to the output argument domain.
+ * \endparblock
+ *
+ * If \a InputType does not match the left-hand side input domain
+ * (see alp::operators::internal::Operator::D1) corresponding to \a OP, then
+ * \a x will be temporarily cached and cast into \a D1.
+ * If \a IOType does not match the right-hand side input domain corresponding
+ * to \a OP, then \a y will be temporarily cached and cast into \a D2.
+ * If \a IOType does not match the output domain corresponding to \a OP, then
+ * the result of \f$ x \odot y \f$ will be temporarily cached before cast to
+ * \a IOType and written to \a y.
+ *
+ * @param[in] x The left-hand side input parameter.
+ * @param[in,out] y On function entry: the right-hand side input parameter.
+ * On function exit: the output of the operator.
+ * @param[in] op The operator to apply (optional).
+ *
+ * @return alp::SUCCESS A call to this function never fails.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This call comprises \f$ \Theta(1) \f$ work. The constant factor
+ * depends on the cost of evaluating the operator.
+ * -# This call will not allocate any new dynamic memory.
+ * -# This call requires at most \f$ \mathit{sizeof}(D_1+D_2+D_3) \f$
+ * bytes of temporary storage, plus any temporary requirements for
+ * evaluating \a op.
+ * -# This call incurs at most \f$ \mathit{sizeof}(D_1+D_2+D_3) +
+ * \mathit{sizeof}(\mathit{InputType}+2\mathit{IOType}) \f$ bytes of
+ * data movement, plus any data movement requirements for evaluating
+ * \a op.
+ * \endparblock
+ *
+ * \warning The use of stateful operators, or even thus use of stateless
+ * operators that are not included in alp::operators, may cause this
+ * function to incur performance penalties beyond the worst case
+ * sketched above.
+ *
+ * \note For the standard stateless operators in alp::operators, there are
+ * no additional temporary storage requirements nor any additional data
+ * movement requirements than the ones mentioned above.
+ *
+ * \note If \a OP is fold-right capable, the temporary storage and data
+ * movement requirements are less than reported above.
+ *
+ * @see foldl for a left-hand in-place version.
+ * @see apply for an example of how to call this function without explicitly
+ * passing \a op.
+ * @see alp::operators::internal Operator for a discussion on fold-right
+ * capable operators and on stateful operators.
+ */
+ template<
+ class OP,
+ typename InputType, typename InputStructure,
+ typename IOType, typename IOStructure,
+ enum Backend implementation = config::default_backend
+ >
+ RC foldr(
+ const Scalar< InputType, InputStructure, implementation > &x,
+ Scalar< IOType, IOStructure, implementation > &y,
+ const OP & op = OP(),
+ const std::enable_if_t<
+ alp::is_operator< OP >::value &&
+ ! alp::is_object< InputType >::value &&
+ ! alp::is_object< IOType >::value
+ > * = nullptr
+ ) {
+
+#ifdef _DEBUG
+ std::cerr << "Selected backend does not implement alp::foldr (scalar)\n";
+#endif
+#ifndef NDEBUG
+ const bool backend_does_not_support_scalar_foldr = false;
+ assert( backend_does_not_support_scalar_foldr );
+#endif
+
+ (void) x;
+ (void) y;
+ (void) op;
+
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Application of the operator \a OP on two data elements. The output data
+ * will overwrite the left-hand side input element.
+ *
+ * In mathematical notation, this function calculates \f$ x \odot y \f$ and
+ * copies the result into \a x.
+ *
+ * @tparam OP The type of the operator to apply.
+ * @tparam IOType The type of the left-hand side input element, which will
+ * be overwritten.
+ * @tparam InputType The type of the right-hand side input element. This
+ * element will be accessed read-only.
+ *
+ * \parblock
+ * \par Valid descriptors
+ * -# alp::descriptors::no_operation for default behaviour.
+ * -# alp::descriptors::no_casting when a call to this function should *not*
+ * automatically cast input arguments to operator input domain, and *not*
+ * automatically cast operator output to the output argument domain.
+ * \endparblock
+ *
+ * If \a InputType does not match the right-hand side input domain
+ * (see alp::operators::internal::Operator::D2) corresponding to \a OP, then
+ * \a x will be temporarily cached and cast into \a D2.
+ * If \a IOType does not match the left-hand side input domain corresponding
+ * to \a OP, then \a y will be temporarily cached and cast into \a D1.
+ * If \a IOType does not match the output domain corresponding to \a OP, then
+ * the result of \f$ x \odot y \f$ will be temporarily cached before cast to
+ * \a IOType and written to \a y.
+ *
+ * @param[in,out] x On function entry: the left-hand side input parameter.
+ * On function exit: the output of the operator.
+ * @param[in] y The right-hand side input parameter.
+ * @param[in] op The operator to apply (optional).
+ *
+ * @return alp::SUCCESS A call to this function never fails.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This call comprises \f$ \Theta(1) \f$ work. The constant factor
+ * depends on the cost of evaluating the operator.
+ * -# This call will not allocate any new dynamic memory.
+ * -# This call requires at most \f$ \mathit{sizeof}(D_1+D_2+D_3) \f$
+ * bytes of temporary storage, plus any temporary requirements for
+ * evaluating \a op.
+ * -# This call incurs at most \f$ \mathit{sizeof}(D_1+D_2+D_3) +
+ * \mathit{sizeof}(\mathit{InputType}+2\mathit{IOType}) \f$ bytes of
+ * data movement, plus any data movement requirements for evaluating
+ * \a op.
+ * \endparblock
+ *
+ * \warning The use of stateful operators, or even thus use of stateless
+ * operators that are not included in alp::operators, may cause this
+ * function to incur performance penalties beyond the worst case
+ * sketched above.
+ *
+ * \note For the standard stateless operators in alp::operators, there are
+ * no additional temporary storage requirements nor any additional data
+ * movement requirements than the ones mentioned above.
+ *
+ * \note If \a OP is fold-left capable, the temporary storage and data
+ * movement requirements are less than reported above.
+ *
+ * @see foldr for a right-hand in-place version.
+ * @see apply for an example of how to call this function without explicitly
+ * passing \a op.
+ * @see alp::operators::internal Operator for a discussion on fold-right
+ * capable operators and on stateful operators.
+ */
+ template<
+ class OP,
+ typename InputType, typename InputStructure,
+ typename IOType, typename IOStructure,
+ enum Backend implementation = config::default_backend
+ >
+ RC foldl(
+ Scalar< IOType, IOStructure, implementation > &x,
+ const Scalar< InputType, InputStructure, implementation > &y,
+ const OP & op = OP(),
+ const std::enable_if_t<
+ alp::is_operator< OP >::value &&
+ ! alp::is_object< InputType >::value &&
+ ! alp::is_object< IOType >::value
+ > * = nullptr
+ ) {
+
+#ifdef _DEBUG
+ std::cerr << "Selected backend does not implement alp::foldl (scalar)\n";
+#endif
+#ifndef NDEBUG
+ const bool backend_does_not_support_scalar_foldl = false;
+ assert( backend_does_not_support_scalar_foldl );
+#endif
+
+ (void) x;
+ (void) y;
+ (void) op;
+
+ return UNSUPPORTED;
+ }
+
+ /** @} */
+
+ namespace internal {
+
+ /**
+ * Helper class that, depending on a given descriptor, either returns a
+ * nonzero value from a vector, or its corresponding coordinate.
+ *
+ * This class hence makes the use of the following descriptor(s) transparent:
+ * -# #alp::descriptors::use_index
+ *
+ * @tparam descr The descriptor under which to write back either the value or
+ * the index.
+ * @tparam OutputType The type of the output to return.
+ * @tparam D The type of the input.
+ * @tparam Enabled Controls, through SFINAE, whether the use of the
+ * #use_index descriptor is allowed at all.
+ */
+ template< alp::Descriptor descr, typename OutputType, typename D, typename Enabled = void >
+ class ValueOrIndex;
+
+ /* Version where use_index is allowed. */
+ template< alp::Descriptor descr, typename OutputType, typename D >
+ class ValueOrIndex<
+ descr, OutputType, D,
+ typename std::enable_if< std::is_arithmetic< OutputType >::value
+ && ! std::is_same< D, void >::value >::type
+ > {
+ private:
+ static constexpr const bool use_index = descr & alp::descriptors::use_index;
+ static_assert(
+ use_index
+ || std::is_convertible< D, OutputType >::value, "Cannot convert to the requested output type"
+ );
+
+ public:
+
+ static OutputType getFromScalar( const D &x, const size_t index ) noexcept {
+ if( use_index ) {
+ return static_cast< OutputType >( index );
+ } else {
+ return static_cast< OutputType >( x );
+ }
+ }
+
+ };
+
+ /* Version where use_index is not allowed. */
+ template< alp::Descriptor descr, typename OutputType, typename D >
+ class ValueOrIndex<
+ descr, OutputType, D,
+ typename std::enable_if< ! std::is_arithmetic< OutputType >::value
+ && ! std::is_same< OutputType, void >::value >::type
+ > {
+ static_assert(
+ !( descr & descriptors::use_index ),
+ "use_index descriptor given while output type is not numeric"
+ );
+ static_assert(
+ std::is_convertible< D, OutputType >::value,
+ "Cannot convert input to the given output type"
+ );
+
+ public:
+
+ static OutputType getFromScalar( const D &x, const size_t ) noexcept {
+ return static_cast< OutputType >( x );
+ }
+ };
+
+ } // namespace internal
+
+} // namespace alp
+
+#undef NO_CAST_ASSERT
+
+#endif // end ``_H_ALP_BLAS0_BASE''
diff --git a/include/alp/base/blas1.hpp b/include/alp/base/blas1.hpp
new file mode 100644
index 000000000..9ed0711d3
--- /dev/null
+++ b/include/alp/base/blas1.hpp
@@ -0,0 +1,877 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 5th of December 2016
+ */
+
+#ifndef _H_ALP_BASE_BLAS1
+#define _H_ALP_BASE_BLAS1
+
+#include // use from grb
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+namespace alp {
+
+ /**
+ * \defgroup BLAS1 The Level-1 ALP/GraphBLAS routines
+ * @{
+ */
+
+ /**
+ * Folds all elements in a ALP Vector \a x into a single value \a beta.
+ */
+ template< Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ typename IOType, typename IOStructure,
+ class Monoid,
+ Backend backend
+ >
+ RC foldr(
+ const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+ Scalar< IOType, IOStructure, backend > &beta,
+ const Monoid &monoid = Monoid(),
+ const std::enable_if_t<
+ !alp::is_object< InputType >::value && !alp::is_object< IOType >::value && alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) x;
+ (void) beta;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /** C++ scalar variant */
+ template< Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ typename IOType,
+ class Monoid,
+ Backend backend
+ >
+ RC foldr(
+ const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+ IOType &beta,
+ const Monoid &monoid = Monoid(),
+ const std::enable_if_t<
+ !alp::is_object< InputType >::value && !alp::is_object< IOType >::value && alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) x;
+ (void) beta;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * For all elements in a ALP Vector \a y, fold the value \f$ \alpha \f$
+ * into each element.
+ */
+ template< Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class Monoid,
+ Backend backend
+ >
+ RC foldr(
+ const Scalar< InputType, InputStructure, backend > &alpha,
+ Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+ const Monoid & monoid = Monoid(),
+ const std::enable_if_t<
+ !alp::is_object< InputType >::value && !alp::is_object< IOType >::value && alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) alpha;
+ (void) y;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Computes y = x + y, operator variant.
+ *
+ * Specialisation for scalar \a x.
+ */
+ template< Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class OP, Backend backend
+ >
+ RC foldr(
+ const Scalar< InputType, InputStructure, backend > &alpha,
+ Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+ const OP & op = OP(),
+ const std::enable_if_t<
+ !alp::is_object< InputType >::value && ! alp::is_object< IOType >::value && alp::is_operator< OP >::value
+ > * const = nullptr
+ ) {
+ (void) alpha;
+ (void) y;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Folds all elements in a ALP Vector \a x into the corresponding
+ * elements from an input/output vector \a y.
+ */
+ template< Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class OP, Backend backend
+ >
+ RC foldr(
+ const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+ Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+ const OP & op = OP(),
+ const std::enable_if_t<
+ alp::is_operator< OP >::value && ! alp::is_object< InputType >::value && ! alp::is_object< IOType >::value
+ > * = nullptr
+ ) {
+ (void) x;
+ (void) y;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Folds all elements in a ALP Vector \a x into the corresponding
+ * elements from an input/output vector \a y.
+ */
+ template< Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class Monoid,
+ Backend backend
+ >
+ RC foldr(
+ const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+ Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &y,
+ const Monoid & monoid = Monoid(),
+ const std::enable_if_t<
+ alp::is_monoid< Monoid >::value && ! alp::is_object< InputType >::value && ! alp::is_object< IOType >::value
+ > * = nullptr
+ ) {
+ (void) x;
+ (void) y;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * For all elements in a ALP Vector \a x, fold the value \f$ \beta \f$
+ * into each element.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ typename InputType, typename InputStructure,
+ class Op,
+ Backend backend
+ >
+ RC foldl(
+ Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &x,
+ const Scalar< InputType, InputStructure, backend > beta,
+ const Op &op = Op(),
+ const std::enable_if_t<
+ ! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value && alp::is_operator< Op >::value
+ > * = nullptr
+ ) {
+ (void) x;
+ (void) beta;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Folds all elements in a ALP Vector \a y into the corresponding
+ * elements from an input/output vector \a x.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ class OP,
+ Backend backend
+ >
+ RC foldl(
+ Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &x,
+ const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+ const OP &op = OP(),
+ const std::enable_if_t<
+ alp::is_operator< OP >::value && !alp::is_object< IOType >::value && !alp::is_object< InputType >::value
+ > * = nullptr
+ ) {
+ (void) x;
+ (void) y;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Folds all elements in a ALP Vector \a y into the corresponding
+ * elements from an input/output vector \a x.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ class Monoid,
+ Backend backend
+ >
+ RC foldl(
+ Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &x,
+ const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+ const Monoid &monoid = Monoid(),
+ const std::enable_if_t<
+ alp::is_monoid< Monoid >::value && ! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value
+ > * = nullptr
+ ) {
+ (void) x;
+ (void) y;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Calculates the element-wise operation on one scalar to elements of one
+ * vector, \f$ z = x .* \beta \f$, using the given operator.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR, typename InputImfC,
+ typename InputType2, typename InputStructure2,
+ class OP,
+ Backend backend
+ >
+ RC eWiseApply(
+ Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR, InputImfC, backend > &x,
+ const Scalar< InputType2, InputStructure2, backend > &beta,
+ const OP &op = OP(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_operator< OP >::value
+ > * const = nullptr
+ ) {
+ (void) z;
+ (void) x;
+ (void) beta;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Computes \f$ z = x \odot y \f$, out of place.
+ *
+ * Specialisation for \a x and \a y scalar, operator version.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1,
+ typename InputType2, typename InputStructure2,
+ class OP,
+ Backend backend
+ >
+ RC eWiseApply(
+ Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+ const Scalar< InputType1, InputStructure1, backend> &alpha,
+ const Scalar< InputType2, InputStructure2, backend> &beta,
+ const OP &op = OP(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_operator< OP >::value
+ > * const = nullptr
+ ) {
+ (void) z;
+ (void) alpha;
+ (void) beta;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Computes \f$ z = x \odot y \f$, out of place.
+ *
+ * Specialisation for \a x and \a y scalar, monoid version.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1,
+ typename InputType2, typename InputStructure2,
+ class Monoid,
+ Backend backend
+ >
+ RC eWiseApply(
+ Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+ const Scalar< InputType1, InputStructure1, backend> &alpha,
+ const Scalar< InputType2, InputStructure2, backend> &beta,
+ const Monoid &monoid = Monoid(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) z;
+ (void) alpha;
+ (void) beta;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Computes \f$ z = x \odot y \f$, out of place.
+ *
+ * Monoid version.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class Monoid,
+ Backend backend
+ >
+ RC eWiseApply(
+ Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+ const Monoid &monoid = Monoid(),
+ const std::enable_if_t<
+ ! alp::is_object< OutputType >::value &&
+ ! alp::is_object< InputType1 >::value &&
+ ! alp::is_object< InputType2 >::value &&
+ alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) z;
+ (void) x;
+ (void) y;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Computes \f$ z = x \odot y \f$, out of place.
+ *
+ * Specialisation for scalar \a x. Monoid version.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class Monoid,
+ Backend backend
+ >
+ RC eWiseApply(
+ Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+ const Scalar< InputType1, InputStructure1, backend> &alpha,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+ const Monoid &monoid = Monoid(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) z;
+ (void) alpha;
+ (void) y;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Computes \f$ z = x \odot y \f$, out of place.
+ *
+ * Specialisation for scalar \a y. Monoid version.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2,
+ class Monoid,
+ Backend backend
+ >
+ RC eWiseApply(
+ Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+ const Scalar< InputType2, InputStructure2, backend > &beta,
+ const Monoid &monoid = Monoid(),
+ const typename std::enable_if<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) z;
+ (void) x;
+ (void) beta;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Calculates the element-wise operation on one scalar to elements of one
+ * vector, \f$ z = \alpha .* y \f$, using the given operator.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class OP,
+ Backend backend
+ >
+ RC eWiseApply(
+ Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+ const Scalar< InputType1, InputStructure1, backend > &alpha,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+ const OP &op = OP(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_operator< OP >::value
+ > * const = nullptr
+ ) {
+ (void) z;
+ (void) alpha;
+ (void) y;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Calculates the element-wise operation on elements of two vectors,
+ * \f$ z = x .* y \f$, using the given operator.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class OP,
+ Backend backend
+ >
+ RC eWiseApply(
+ Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+ const OP &op = OP(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_operator< OP >::value
+ > * const = nullptr
+ ) {
+ (void) z;
+ (void) x;
+ (void) y;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Calculates the element-wise multiplication of two vectors,
+ * \f$ z = z + x .* y \f$,
+ * under a given semiring.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class Ring,
+ Backend backend
+ >
+ RC eWiseMul(
+ Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+ const Ring &ring = Ring(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_semiring< Ring >::value
+ > * const = nullptr
+ ) {
+ (void) z;
+ (void) x;
+ (void) y;
+ (void) ring;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Computes \f$ z = z + x * y \f$.
+ *
+ * Specialisation for scalar \a x.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class Ring,
+ Backend backend
+ >
+ RC eWiseMul(
+ Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+ const Scalar< InputType1, InputStructure1, backend > &alpha,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+ const Ring &ring = Ring(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_semiring< Ring >::value
+ > * const = nullptr
+ ) {
+ (void) z;
+ (void) alpha;
+ (void) y;
+ (void) ring;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Computes \f$ z = z + x * y \f$.
+ *
+ * Specialisation for scalar \a y.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2,
+ class Ring,
+ Backend backend
+ >
+ RC eWiseMul(
+ Vector< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &z,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+ const Scalar< InputType2, InputStructure2, backend > &beta,
+ const Ring &ring = Ring(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_semiring< Ring >::value
+ > * const = nullptr
+ ) {
+ (void) z;
+ (void) x;
+ (void) beta;
+ (void) ring;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Calculates the dot product, \f$ \alpha = (x,y) \f$, under a given additive
+ * monoid and multiplicative operator.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class AddMonoid, class AnyOp,
+ Backend backend
+ >
+ RC dot(
+ Scalar< OutputType, OutputStructure, backend > &z,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+ const AddMonoid &addMonoid = AddMonoid(),
+ const AnyOp &anyOp = AnyOp(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_monoid< AddMonoid >::value &&
+ alp::is_operator< AnyOp >::value
+ > * const = nullptr
+ ) {
+ (void) x;
+ (void) y;
+ (void) addMonoid;
+ (void) anyOp;
+ return UNSUPPORTED;
+ }
+
+ /** C++ scalar specialization */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class AddMonoid, class AnyOp,
+ Backend backend
+ >
+ RC dot(
+ OutputType &z,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+ const AddMonoid &addMonoid = AddMonoid(),
+ const AnyOp &anyOp = AnyOp(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_monoid< AddMonoid >::value &&
+ alp::is_operator< AnyOp >::value
+ > * const = nullptr
+ ) {
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Provides a generic implementation of the dot computation on semirings by
+ * translating it into a dot computation on an additive commutative monoid
+ * with any multiplicative operator.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename IOType, typename IOStructure,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class Ring,
+ Backend backend
+ >
+ RC dot(
+ Scalar< IOType, IOStructure, backend > &x,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &left,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &right,
+ const Ring &ring = Ring(),
+ const std::enable_if_t<
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ !alp::is_object< IOType >::value &&
+ alp::is_semiring< Ring >::value
+ > * const = nullptr
+ ) {
+ (void) x;
+ (void) left;
+ (void) right;
+ (void) ring;
+ return UNSUPPORTED;
+ }
+
+ /** C++ scalar specialization. */
+ template<
+ Descriptor descr = descriptors::no_operation, class Ring,
+ typename IOType,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ Backend backend
+ >
+ RC dot(
+ IOType &x,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &left,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &right,
+ const Ring &ring = Ring(),
+ const std::enable_if_t<
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ !alp::is_object< IOType >::value &&
+ alp::is_semiring< Ring >::value
+ > * const = nullptr
+ ) {
+ (void) x;
+ (void) left;
+ (void) right;
+ (void) ring;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * This is the eWiseLambda that performs length checking by recursion.
+ *
+ * in the backend implementation all vectors are distributed equally, so no
+ * need to synchronise any data structures. We do need to do error checking
+ * though, to see when to return alp::MISMATCH. That's this function.
+ *
+ * @see Vector::operator[]()
+ * @see Vector::lambda_backend
+ */
+ template<
+ typename Func,
+ typename DataType1, typename DataStructure1, typename DataView1, typename InputImfR1, typename InputImfC1,
+ typename DataType2, typename DataStructure2, typename DataView2, typename InputImfR2, typename InputImfC2,
+ Backend backend,
+ typename... Args
+ >
+ RC eWiseLambda(
+ const Func f,
+ Vector< DataType1, DataStructure1, Density::Dense, DataView1, InputImfR1, InputImfC1, backend > &x,
+ const Vector< DataType2, DataStructure2, Density::Dense, DataView2, InputImfR2, InputImfC2, backend > &y,
+ Args const &... args
+ ) {
+ (void) f;
+ (void) x;
+ (void) y;
+ // (void) args;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * No implementation notes. This is the `real' implementation on backend
+ * vectors.
+ *
+ * @see Vector::operator[]()
+ * @see Vector::lambda_backend
+ */
+ template<
+ typename Func,
+ typename DataType, typename DataStructure, typename DataView, typename DataImfR, typename DataImfC,
+ Backend backend
+ >
+ RC eWiseLambda(
+ const Func f,
+ Vector< DataType, DataStructure, Density::Dense, DataView, DataImfR, DataImfC, backend > &x
+ ) {
+ (void) f;
+ (void) x;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Reduces a vector into a scalar. Reduction takes place according a monoid
+ * \f$ (\oplus,1) \f$, where \f$ \oplus:\ D_1 \times D_2 \to D_3 \f$ with an
+ * associated identity \f$ 1 \in \{D_1,D_2,D_3\} \f$. Elements from the given
+ * vector \f$ y \in \{D_1,D_2\} \f$ will be applied at the left-hand or right-
+ * hand side of \f$ \oplus \f$; which, exactly, is implementation-dependent
+ * but should not matter since \f$ \oplus \f$ should be associative.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename IOType, typename IOStructure,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ class Monoid,
+ Backend backend
+ >
+ RC foldl(
+ Scalar< IOType, IOStructure, backend > &alpha,
+ const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+ const Monoid &monoid = Monoid(),
+ const std::enable_if_t<
+ ! alp::is_object< IOType >::value && ! alp::is_object< InputType >::value && alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) alpha;
+ (void) y;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Sort vectors, function available to user, e.g. to sort eigenvectors
+ */
+ template<
+ typename IndexType, typename IndexStructure, typename IndexView, typename IndexImfR, typename IndexImfC,
+ typename ValueType, typename ValueStructure, typename ValueView, typename ValueImfR, typename ValueImfC,
+ typename Compare,
+ Backend backend
+ >
+ RC sort(
+ Vector< IndexType, IndexStructure, Density::Dense, IndexView, IndexImfR, IndexImfC, backend > &permutation,
+ const Vector< ValueType, ValueStructure, Density::Dense, ValueView, ValueImfR, ValueImfC, backend > &toSort,
+ Compare cmp
+ ) noexcept {
+ (void) permutation;
+ (void) toSort;
+ (void) cmp;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Provides a generic implementation of the 2-norm computation.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ class Ring,
+ Backend backend
+ >
+ RC norm2(
+ Scalar< OutputType, OutputStructure, backend > &x,
+ const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+ const Ring &ring = Ring(),
+ const std::enable_if_t<
+ std::is_floating_point< OutputType >::value || grb::utils::is_complex< OutputType >::value
+ > * const = nullptr
+ ) {
+ (void) x;
+ (void) y;
+ (void) ring;
+ return UNSUPPORTED;
+ }
+
+ /** C++ scalar version */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ class Ring,
+ Backend backend
+ >
+ RC norm2(
+ OutputType &x,
+ const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &y,
+ const Ring &ring = Ring(),
+ const std::enable_if_t<
+ std::is_floating_point< OutputType >::value || grb::utils::is_complex< OutputType >::value
+ > * const = nullptr
+ ) {
+ (void) x;
+ (void) y;
+ (void) ring;
+ return UNSUPPORTED;
+ }
+
+ /** @} */
+
+} // end namespace alp
+
+#endif // end _H_ALP_BASE_BLAS1
+
diff --git a/include/alp/base/blas2.hpp b/include/alp/base/blas2.hpp
new file mode 100644
index 000000000..2334d7710
--- /dev/null
+++ b/include/alp/base/blas2.hpp
@@ -0,0 +1,473 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ *
+ * Defines the GraphBLAS level 2 API.
+ *
+ * @author A. N. Yzelman
+ * @date 30th of March 2017
+ */
+
+#ifndef _H_ALP_BLAS2_BASE
+#define _H_ALP_BLAS2_BASE
+
+#include
+#include
+
+#include
+#include
+#include
+#include
+
+#include "blas1.hpp"
+#include "config.hpp"
+#include "matrix.hpp"
+#include "vector.hpp"
+
+namespace alp {
+
+ /**
+ * \defgroup BLAS2 The Level-2 Basic Linear Algebra Subroutines (BLAS)
+ *
+ * A collection of functions that allow GraphBLAS operators, monoids, and
+ * semirings work on a mix of zero-dimensional, one-dimensional, and
+ * two-dimensional containers.
+ *
+ * That is, these functions allow various linear algebra operations on
+ * scalars, objects of type alp::Vector, and objects of type alp::Matrix.
+ *
+ * \note The backends of each opaque data type should match.
+ *
+ * @{
+ */
+
+ template<
+ Descriptor descr = descriptors::no_operation,
+ class Ring,
+ typename IOType = typename Ring::D4, typename IOStructure,
+ typename IOView, typename IOImfR, typename IOImfC,
+ typename InputType1 = typename Ring::D1, typename InputStructure1,
+ typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2 = typename Ring::D2, typename InputStructure2,
+ typename InputView2, typename InputImfR2, typename InputImfC2,
+ Backend backend
+ >
+ RC vxm(
+ Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+ const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+ const Ring &ring = Ring(),
+ const std::enable_if_t< alp::is_semiring< Ring >::value > * const = nullptr
+ ) {
+ (void) u;
+ (void) v;
+ (void) A;
+ return UNSUPPORTED;
+ }
+
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename IOType, typename IOStructure, typename IOView,
+ typename IOImfR, typename IOImfC,
+ typename InputType1, typename InputStructure1, typename InputView1,
+ typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2,
+ typename InputImfR2, typename InputImfC2,
+ class AdditiveMonoid, class MultiplicativeOperator,
+ Backend backend
+ >
+ RC vxm(
+ Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+ const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+ const AdditiveMonoid &add = AdditiveMonoid(),
+ const MultiplicativeOperator &mul = MultiplicativeOperator(),
+ const std::enable_if_t<
+ alp::is_monoid< AdditiveMonoid >::value &&
+ alp::is_operator< MultiplicativeOperator >::value &&
+ !alp::is_object< IOType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ !std::is_same< InputType2, void >::value
+ > * const = nullptr
+ ) {
+ (void) u;
+ (void) v;
+ (void) A;
+ (void) add;
+ (void) mul;
+ return UNSUPPORTED;
+ }
+
+ template<
+ Descriptor descr = descriptors::no_operation,
+ class Ring,
+ typename IOType = typename Ring::D4, typename IOStructure,
+ typename IOView, typename IOImfR, typename IOImfC,
+ typename InputType2 = typename Ring::D2, typename InputStructure2,
+ typename InputView2, typename InputImfR2, typename InputImfC2,
+ typename InputType1 = typename Ring::D1, typename InputStructure1,
+ typename InputView1, typename InputImfR1, typename InputImfC1,
+ Backend backend
+ >
+ RC mxv(
+ Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+ const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+ const Ring &ring,
+ const std::enable_if_t< alp::is_semiring< Ring >::value > * const = nullptr
+ ) {
+ (void) u;
+ (void) A;
+ (void) v;
+ (void) ring;
+ return UNSUPPORTED;
+ }
+
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename IOType, typename IOStructure, typename IOView,
+ typename IOImfR, typename IOImfC,
+ typename InputType2, typename InputStructure2, typename InputView2,
+ typename InputImfR2, typename InputImfC2,
+ typename InputType1, typename InputStructure1, typename InputView1,
+ typename InputImfR1, typename InputImfC1,
+ class AdditiveMonoid, class MultiplicativeOperator,
+ Backend backend
+ >
+ RC mxv(
+ Vector< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &u,
+ const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &A,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &v,
+ const AdditiveMonoid &add = AdditiveMonoid(),
+ const MultiplicativeOperator &mul = MultiplicativeOperator(),
+ const std::enable_if_t<
+ alp::is_monoid< AdditiveMonoid >::value &&
+ alp::is_operator< MultiplicativeOperator >::value &&
+ !alp::is_object< IOType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ !std::is_same< InputType2, void >::value
+ > * const = nullptr
+ ) {
+ (void) u;
+ (void) A;
+ (void) v;
+ (void) add;
+ (void) mul;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * @see alp::eWiseLambda for the user-level specification.
+ */
+ template<
+ typename Func,
+ typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
+ Backend backend
+ >
+ RC eWiseLambda(
+ const Func f,
+ Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, backend > &A
+ ) {
+ (void) f;
+ (void) A;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * This function provides dimension checking and will defer to the below
+ * function for the actual implementation.
+ *
+ * @see alp::eWiseLambda for the user-level specification.
+ */
+ template<
+ typename Func,
+ typename DataType1, typename Structure1, typename View1, typename ImfR1, typename ImfC1,
+ typename DataType2, typename Structure2, typename View2, typename ImfR2, typename ImfC2,
+ Backend backend,
+ typename... Args
+ >
+ RC eWiseLambda(
+ const Func f,
+ Matrix< DataType1, Structure1, Density::Dense, View1, ImfR1, ImfC1, backend > &A,
+ const Vector< DataType2, Structure2, Density::Dense, View2, ImfR2, ImfC2, backend > &x,
+ Args const &... args
+ ) {
+ // do size checking
+ if( !( size( x ) == nrows( A ) || size( x ) == ncols( A ) ) ) {
+ std::cerr << "Mismatching dimensions: given vector of size " << size( x )
+ << " has nothing to do with either matrix dimension (" << nrows( A ) << " nor " << ncols( A ) << ").\n";
+ return MISMATCH;
+ }
+
+ return eWiseLambda( f, A, args... );
+ }
+
+ /**
+ * For all elements in a ALP Matrix \a B, fold the value \f$ \alpha \f$
+ * into each element.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class Monoid,
+ Backend backend
+ >
+ RC foldr(
+ const Scalar< InputType, InputStructure, backend > &alpha,
+ Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+ const Monoid &monoid = Monoid(),
+ const std::enable_if_t<
+ !alp::is_object< InputType >::value &&
+ !alp::is_object< IOType >::value &&
+ alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) alpha;
+ (void) B;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /** Folds element-wise alpha into B, operator variant */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class Operator,
+ Backend backend
+ >
+ RC foldr(
+ const Scalar< InputType, InputStructure, backend > &alpha,
+ Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+ const Operator &op = Operator(),
+ const std::enable_if_t<
+ !alp::is_object< InputType >::value &&
+ !alp::is_object< IOType >::value &&
+ alp::is_operator< Operator >::value
+ > * const = nullptr
+ ) {
+ (void) alpha;
+ (void) B;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /** Folds element-wise A into B, monoid variant */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class Monoid,
+ Backend backend
+ >
+ RC foldr(
+ const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A,
+ Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+ const Monoid &monoid = Monoid(),
+ const std::enable_if_t<
+ !alp::is_object< InputType >::value &&
+ !alp::is_object< IOType >::value &&
+ alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) A;
+ (void) B;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /** Folds element-wise A into B, operator variant */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class Operator,
+ Backend backend
+ >
+ RC foldr(
+ const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A,
+ Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &B,
+ const Operator &op = Operator(),
+ const std::enable_if_t<
+ !alp::is_object< InputType >::value &&
+ !alp::is_object< IOType >::value &&
+ alp::is_operator< Operator >::value
+ > * const = nullptr
+ ) {
+ (void) A;
+ (void) B;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /** Folds element-wise B into A, monoid variant */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class Monoid,
+ Backend backend
+ >
+ RC foldl(
+ Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+ const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &B,
+ const Monoid &monoid = Monoid(),
+ const std::enable_if_t<
+ !alp::is_object< IOType >::value &&
+ !alp::is_object< InputType >::value &&
+ alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) A;
+ (void) B;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /** Folds element-wise B into A, operator variant */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class Operator,
+ Backend backend
+ >
+ RC foldl(
+ Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+ const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &B,
+ const Operator &op = Operator(),
+ const std::enable_if_t<
+ !alp::is_object< IOType >::value &&
+ !alp::is_object< InputType >::value &&
+ alp::is_operator< Operator >::value
+ > * const = nullptr
+ ) {
+ (void) A;
+ (void) B;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /** Folds element-wise beta into A, monoid variant */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class Monoid,
+ Backend backend
+ >
+ RC foldl(
+ Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+ const Scalar< InputType, InputStructure, backend > &beta,
+ const Monoid &monoid = Monoid(),
+ const std::enable_if_t<
+ !alp::is_object< IOType >::value &&
+ !alp::is_object< InputType >::value &&
+ alp::is_monoid< Monoid >::value
+ > * const = nullptr
+ ) {
+ (void) A;
+ (void) beta;
+ (void) monoid;
+ return UNSUPPORTED;
+ }
+
+ /** Folds element-wise beta into A, operator variant */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure,
+ typename IOType, typename IOStructure, typename IOView, typename IOImfR, typename IOImfC,
+ class Operator,
+ Backend backend
+ >
+ RC foldl(
+ Matrix< IOType, IOStructure, Density::Dense, IOView, IOImfR, IOImfC, backend > &A,
+ const Scalar< InputType, InputStructure, backend > &beta,
+ const Operator &op = Operator(),
+ const std::enable_if_t<
+ !alp::is_object< IOType >::value &&
+ !alp::is_object< InputType >::value &&
+ alp::is_operator< Operator >::value
+ > * const = nullptr
+ ) {
+ (void) A;
+ (void) beta;
+ (void) op;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Returns a view over the input matrix returning conjugate of the accessed element.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
+ Backend backend,
+ std::enable_if_t<
+ !structures::is_a< Structure, structures::Square >::value
+ > * = nullptr
+ >
+ Matrix<
+ DataType, Structure, Density::Dense,
+ view::Functor< std::function< void( DataType &, const size_t, const size_t ) > >,
+ imf::Id, imf::Id,
+ backend
+ >
+ conjugate(
+ const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, backend > &A,
+ const std::enable_if_t<
+ !alp::is_object< DataType >::value
+ > * const = nullptr
+ ) {
+ (void) A;
+ return UNSUPPORTED;
+ }
+
+ /** Specialization for square matrices */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename DataType, typename Structure, typename View, typename ImfR, typename ImfC,
+ Backend backend,
+ std::enable_if_t<
+ structures::is_a< Structure, structures::Square >::value
+ > * = nullptr
+ >
+ Matrix<
+ DataType, Structure, Density::Dense,
+ view::Functor< std::function< void( DataType &, const size_t, const size_t ) > >,
+ imf::Id, imf::Id,
+ backend
+ >
+ conjugate(
+ const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, backend > &A,
+ const std::enable_if_t<
+ !alp::is_object< DataType >::value
+ > * const = nullptr
+ ) {
+ (void) A;
+ return UNSUPPORTED;
+ }
+ /** @} */
+
+} // namespace alp
+
+#endif // end _H_ALP_BLAS2_BASE
diff --git a/include/alp/base/blas3.hpp b/include/alp/base/blas3.hpp
new file mode 100644
index 000000000..1875ddc4e
--- /dev/null
+++ b/include/alp/base/blas3.hpp
@@ -0,0 +1,336 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ */
+
+#ifndef _H_ALP_BLAS3_BASE
+#define _H_ALP_BLAS3_BASE
+
+#include // use from grb
+
+#include
+#include
+#include
+#include
+
+#include "matrix.hpp"
+#include "vector.hpp"
+#include "io.hpp"
+
+
+namespace alp {
+
+ /**
+ * \defgroup BLAS3 The Level-3 Basic Linear Algebra Subroutines (BLAS)
+ *
+ * A collection of functions that allow GraphBLAS semirings to work on
+ * one or more two-dimensional sparse containers (i.e, sparse matrices).
+ *
+ * @{
+ */
+
+ /**
+ * @brief Computes \f$ C = A . B \f$ for a given monoid.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class MulMonoid,
+ Backend backend
+ >
+ RC eWiseApply(
+ Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+ const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+ const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+ const MulMonoid &mulmono,
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_monoid< MulMonoid >::value
+ > * const = nullptr
+ ) {
+ (void) C;
+ (void) A;
+ (void) B;
+ (void) mulmono;
+ return UNSUPPORTED;
+ }
+
+
+ /**
+ * Computes \f$ C = alpha . B \f$ for a given monoid.
+ *
+ * Case where \a A is a scalar.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class MulMonoid,
+ Backend backend
+ >
+ RC eWiseApply(
+ Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+ const Scalar< InputType1, InputStructure1, backend > &alpha,
+ const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+ const MulMonoid &mulmono,
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_monoid< MulMonoid >::value
+ > * const = nullptr
+ ) {
+ (void) C;
+ (void) alpha;
+ (void) B;
+ (void) mulmono;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Computes \f$ C = A . beta \f$ for a given monoid.
+ *
+ * Case where \a B is a scalar.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2,
+ class MulMonoid,
+ Backend backend
+ >
+ RC eWiseApply(
+ Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+ const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+ const Scalar< InputType2, InputStructure2, backend > &beta,
+ const MulMonoid &mulmono,
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_monoid< MulMonoid >::value
+ > * const = nullptr
+ ) {
+ (void) C;
+ (void) A;
+ (void) beta;
+ (void) mulmono;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Calculates the element-wise multiplication of two matrices,
+ * \f$ C = C + A .* B \f$,
+ * under a given semiring.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation, class Ring,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ Backend backend
+ >
+ RC eWiseMul(
+ Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+ const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+ const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+ const Ring &ring = Ring(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_semiring< Ring >::value
+ > * const = nullptr
+ ) {
+ (void) C;
+ (void) A;
+ (void) B;
+ (void) ring;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * eWiseMul, version where A is a scalar.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation, class Ring,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ Backend backend
+ >
+ RC eWiseMul(
+ Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+ const Scalar< InputType1, InputStructure1, backend > &alpha,
+ const Matrix< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &B,
+ const Ring &ring = Ring(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_semiring< Ring >::value
+ > * const = nullptr
+ ) {
+ (void) C;
+ (void) alpha;
+ (void) B;
+ (void) ring;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * eWiseMul, version where B is a scalar.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation, class Ring,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2,
+ Backend backend
+ >
+ RC eWiseMul(
+ Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+ const Matrix< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &A,
+ const Scalar< InputType2, InputStructure2, backend > &beta,
+ const Ring &ring = Ring(),
+ const std::enable_if_t<
+ !alp::is_object< OutputType >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ alp::is_semiring< Ring >::value
+ > * const = nullptr
+ ) {
+ (void) C;
+ (void) A;
+ (void) beta;
+ (void) ring;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * @brief Outer product of two vectors. The result matrix \a A will contain \f$ uv^T \f$.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView, typename OutputImfR, typename OutputImfC,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class Operator,
+ Backend backend
+ >
+ RC outer(
+ Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &A,
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &u,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &v,
+ const Operator &mul = Operator(),
+ const std::enable_if_t<
+ alp::is_operator< Operator >::value &&
+ !alp::is_object< InputType1 >::value &&
+ !alp::is_object< InputType2 >::value &&
+ !alp::is_object< OutputType >::value
+ > * const = nullptr
+ ) {
+ (void) A;
+ (void) u;
+ (void) v;
+ (void) mul;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Returns a view over the general rank-1 matrix computed with the outer product.
+ * This avoids creating the resulting container. The elements are calculated lazily on access.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType1, typename InputStructure1, typename InputView1, typename InputImfR1, typename InputImfC1,
+ typename InputType2, typename InputStructure2, typename InputView2, typename InputImfR2, typename InputImfC2,
+ class Operator,
+ Backend backend
+ >
+ Matrix<
+ typename Operator::D3, structures::General, Density::Dense,
+ view::Functor< std::function< void( InputType1 &, const size_t, const size_t ) > >,
+ imf::Id, imf::Id,
+ backend
+ >
+ outer(
+ const Vector< InputType1, InputStructure1, Density::Dense, InputView1, InputImfR1, InputImfC1, backend > &x,
+ const Vector< InputType2, InputStructure2, Density::Dense, InputView2, InputImfR2, InputImfC2, backend > &y,
+ const Operator &mul = Operator(),
+ const typename std::enable_if<
+ alp::is_operator< Operator >::value &&
+ ! alp::is_object< InputType1 >::value &&
+ ! alp::is_object< InputType2 >::value
+ > * const = nullptr
+ ) {
+ (void) x;
+ (void) y;
+ (void) mul;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Returns a view over the general rank-1 matrix computed with the outer product.
+ * Version for the case when input vectors are the same vector,
+ * which results in a symmetric matrix.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType, typename InputStructure, typename InputView, typename InputImfR, typename InputImfC,
+ class Operator,
+ Backend backend
+ >
+ Matrix<
+ typename Operator::D3,
+ typename std::conditional<
+ grb::utils::is_complex< typename Operator::D3 >::value,
+ alp::structures::Hermitian,
+ alp::structures::Symmetric
+ >::type,
+ Density::Dense,
+ view::Functor< std::function< void( typename Operator::D3 &, const size_t, const size_t ) > >,
+ imf::Id, imf::Id,
+ backend
+ >
+ outer(
+ const Vector< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &x,
+ const Operator &mul = Operator(),
+ const std::enable_if_t<
+ alp::is_operator< Operator >::value &&
+ !alp::is_object< InputType >::value
+ > * const = nullptr
+ ) {
+ (void) x;
+ (void) mul;
+ return UNSUPPORTED;
+ }
+ /**
+ * @}
+ */
+
+} // namespace alp
+
+#endif // end _H_ALP_BLAS3_BASE
diff --git a/include/alp/base/collectives.hpp b/include/alp/base/collectives.hpp
new file mode 100644
index 000000000..1ac2e87eb
--- /dev/null
+++ b/include/alp/base/collectives.hpp
@@ -0,0 +1,259 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman & J. M. Nash
+ * @date 20th of February, 2017
+ */
+
+#ifndef _H_ALP_COLL_BASE
+#define _H_ALP_COLL_BASE
+
+#include
+#include
+#include
+
+
+namespace alp {
+
+ /**
+ * A static class defining various collective operations on scalars. This
+ * class is templated in terms of the backends that are implemented-- each
+ * implementation provides its own mechanisms to handle collective
+ * communications. These are required for users employing alp::eWiseLambda,
+ * or for users who perform explicit SPMD programming.
+ */
+ template< enum Backend implementation >
+ class collectives {
+
+ private:
+ /** Disallow creating an instance. */
+ collectives() {}
+
+ public:
+ /**
+ * Schedules an allreduce operation of a single object of type IOType per
+ * process. The allreduce shall be complete by the end of the call. This is a
+ * collective graphBLAS operation. After the collective call finishes, each
+ * user process will locally have available the allreduced value.
+ *
+ * Since this is a collective call, there are \a P values \a inout spread over
+ * all user processes. Let these values be denoted by \f$ x_s \f$, with
+ * \f$ s \in \{ 0, 1, \ldots, P-1 \}, \f$ such that \f$ x_s \f$ equals the
+ * argument \a inout on input at the user process with ID \a s. Let
+ * \f$ \pi:\ \{ 0, 1, \ldots, P-1 \} \to \{ 0, 1, \ldots, P-1 \} \f$ be a
+ * bijection, some unknown permutation of the process ID. This permutation is
+ * must be fixed for any given combination of GraphBLAS implementation and value
+ * \a P. Let the binary operator \a op be denoted by \f$ \odot \f$.
+ *
+ * This function computes \f$ \odot_{i=0}^{P-1} x_{\pi(i)} \f$ and writes the
+ * exact same result to \a inout at each of the \a P user processes.
+ *
+ * In summary, this means 1) this operation is coherent across all processes and
+ * produces bit-wise equivalent output on all user processes, and 2) the result
+ * is reproducible across different runs using the same input and \a P. Yet it
+ * does \em not mean that the order of addition is fixed.
+ *
+ * Since each user process supplies but one value, there is no difference
+ * between a reduce-to-the-left versus a reduce-to-the-right (see alp::reducel
+ * and alp::reducer).
+ *
+ * @tparam descr The GraphBLAS descriptor.
+ * Default is alp::descriptors::no_operation.
+ * @tparam Operator Which operator to use for reduction.
+ * @tparam IOType The type of the to-be reduced value.
+ *
+ * @param[in,out] inout On input: the value at the calling process to be
+ * reduced. On output: the reduced value.
+ * @param[in] op The associative operator to reduce by.
+ *
+ * \note If \op is commutative, the implementation free to employ a different
+ * allreduce algorithm, as long as it is documented well enough so that
+ * its cost can be quantified.
+ *
+ * @returns alp::SUCCESS When the operation succeeds as planned.
+ * @returns alp::PANIC When the communication layer unexpectedly fails. When
+ * this error code is returned, the library enters an
+ * undefined state.
+ *
+ * \parblock
+ * \par Valid descriptors:
+ * -# alp::descriptors::no_operation
+ * -# alp::descriptors::no_casting
+ * Any other descriptors will be ignored.
+ * \endparblock
+ *
+ * \parblock
+ * \par Performance semantics:
+ * -# Problem size N: \f$ P * \mathit{sizeof}(\mathit{IOType}) \f$
+ * -# local work: \f$ N*Operator \f$ ;
+ * -# transferred bytes: \f$ N \f$ ;
+ * -# BSP cost: \f$ Ng + N*Operator + l \f$;
+ * \endparblock
+ */
+ template< Descriptor descr = descriptors::no_operation, typename Operator, typename IOType >
+ static RC allreduce( IOType & inout, const Operator op = Operator() ) {
+ (void)inout;
+ (void)op;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Schedules a reduce operation of a single object of type IOType per process.
+ * The reduce shall be complete by the end of the call. This is a collective
+ * graphBLAS operation. The BSP costs are as for the PlatformBSP #reduce.
+ *
+ * Since this is a collective call, there are \a P values \a inout spread over
+ * all user processes. Let these values be denoted by \f$ x_s \f$, with
+ * \f$ s \in \{ 0, 1, \ldots, P-1 \}, \f$ such that \f$ x_s \f$ equals the
+ * argument \a inout on input at the user process with ID \a s. Let
+ * \f$ \pi:\ \{ 0, 1, \ldots, P-1 \} \to \{ 0, 1, \ldots, P-1 \} \f$ be a
+ * bijection, some unknown permutation of the process ID. This permutation is
+ * must be fixed for any given combination of GraphBLAS implementation and value
+ * \a P. Let the binary operator \a op be denoted by \f$ \odot \f$.
+ *
+ * This function computes \f$ \odot_{i=0}^{P-1} x_{\pi(i)} \f$ and writes the
+ * result to \a inout at the user process with ID \a root.
+ *
+ * In summary, this the result is reproducible across different runs using the
+ * same input and \a P. Yet it does \em not mean that the order of addition is
+ * fixed.
+ *
+ * Since each user process supplies but one value, there is no difference
+ * between a reduce-to-the-left versus a reduce-to-the-right (see alp::reducel
+ * and alp::reducer).
+ *
+ * @tparam descr The GraphBLAS descriptor.
+ * Default is alp::descriptors::no_operation.
+ * @tparam Operator Which operator to use for reduction.
+ * @tparam IOType The type of the to-be reduced value.
+ *
+ * @param[in,out] inout On input: the value at the calling process to be
+ * reduced. On output at process \a root: the reduced value.
+ * On output as non-root processes: same value as on input.
+ * @param[in] op The associative operator to reduce by.
+ * @param[in] root Which process should hold the reduced value. This
+ * number must be larger or equal to zero, and must be
+ * strictly smaller than the number of user processes
+ * \a P.
+ *
+ * @return SUCCESS When the function completes successfully.
+ * @return ILLEGAL When root is larger or equal than \a P. When this code is
+ * returned, the state of the GraphBLAS shall be as though
+ * this call was never made.
+ * @return PANIC When an unmitigable error within the GraphBLAS occurs.
+ * Upon returning this error, the GraphBLAS enters an
+ * undefined state.
+ *
+ * \note If \op is commutative, the implementation free to employ a different
+ * allreduce algorithm, as long as it is documented well enough so that
+ * its cost can be quantified.
+ *
+ * \parblock
+ * \par Performance semantics:
+ * -# Problem size N: \f$ P * \mathit{sizeof}(\mathit{IOType}) \f$
+ * -# local work: \f$ N*Operator \f$ ;
+ * -# transferred bytes: \f$ N \f$ ;
+ * -# BSP cost: \f$ Ng + N*Operator + l \f$;
+ * \endparblock
+ */
+ template< Descriptor descr = descriptors::no_operation, typename Operator, typename IOType >
+ static RC reduce( IOType & inout, const size_t root = 0, const Operator op = Operator() ) {
+ (void)inout;
+ (void)op;
+ (void)root;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Schedules a broadcast operation of a single object of type IOType per
+ * process. The broadcast shall be complete by the end of the call. This is
+ * a collective graphBLAS operation. The BSP costs are as for the PlatformBSP
+ * #broadcast.
+ *
+ * @tparam IOType The type of the to-be broadcast value.
+ *
+ * @param[in,out] inout On input at process \a root: the value to be
+ * broadcast.
+ * On input at non-root processes: initial values are
+ * ignored.
+ * On output at process \a root: the input value remains
+ * unchanged.
+ * On output at non-root processes: the same value held
+ * at process ID \a root.
+ * @param[in] root The user process which is to send out the given input
+ * value \a inout so that it becomes available at all
+ * \a P user processes. This value must be larger or
+ * equal to zero and must be smaller than the total
+ * number of user processes \a P.
+ *
+ * @return SUCCESS On the successful completion of this function.
+ * @return ILLEGAL When \a root is larger or equal to \a P. If this code is
+ * returned, it shall be as though the call to this function
+ * had never occurred.
+ * return PANIC When the function fails and the library enters an
+ * undefined state.
+ *
+ * \parblock
+ * \par Performance semantics: serial
+ * -# Problem size N: \f$ \mathit{sizeof}(\mathit{IOType}) \f$
+ * -# local work: \f$ 0 \f$ ;
+ * -# transferred bytes: \f$ NP \f$ ;
+ * -# BSP cost: \f$ NPg + l \f$;
+ * \endparblock
+ *
+ * \par Performance semantics: two phase
+ * -# Problem size N: \f$ \mathit{sizeof}(\mathit{IOType}) \f$
+ * -# local work: \f$ 0 \f$ ;
+ * -# transferred bytes: \f$ 2N \f$ ;
+ * -# BSP cost: \f$ 2(Ng + l) \f$;
+ * \endparblock
+ *
+ * \par Performance semantics: two level tree
+ * -# Problem size N: \f$ \mathit{sizeof}(\mathit{IOType}) \f$
+ * -# local work: \f$ 0 \f$ ;
+ * -# transferred bytes: \f$ 2\sqrt{P}N \f$ ;
+ * -# BSP cost: \f$ 2(\sqrt{P}Ng + l) \f$;
+ * \endparblock
+ */
+ template< typename IOType >
+ static RC broadcast( IOType &inout, const size_t root = 0 ) {
+ (void)inout;
+ (void)root;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Broadcast on an array of \a IOType.
+ *
+ * The above documentation applies with \a size times sizeof(IOType)
+ * substituted in.
+ */
+ template< Descriptor descr = descriptors::no_operation, typename IOType >
+ static RC broadcast( IOType * inout, const size_t size, const size_t root = 0 ) {
+ (void)inout;
+ (void)size;
+ (void)root;
+ return UNSUPPORTED;
+ }
+
+ }; // end class ``collectives''
+
+} // end namespace alp
+
+#endif // end _H_ALP_COLL_BASE
+
diff --git a/include/alp/base/config.hpp b/include/alp/base/config.hpp
new file mode 100644
index 000000000..5a5dcc0fe
--- /dev/null
+++ b/include/alp/base/config.hpp
@@ -0,0 +1,309 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 8th of August, 2016
+ */
+
+#ifndef _H_ALP_CONFIG_BASE
+#define _H_ALP_CONFIG_BASE
+
+#include //size_t
+#ifndef _ALP_NO_STDIO
+ #include //std::cout
+#endif
+#include
+
+#include
+#include //sysconf
+
+#include
+
+
+// if the user did not define _ALP_BACKEND, set it to the default sequential
+// implementation
+#ifndef _ALP_BACKEND
+ #define _ALP_BACKEND reference
+#endif
+
+// if the user did not define _ALP_SECONDARY_BACKEND, set it to the default
+// sequential implementation. This setting may be used by other backends for
+// backend-specific purposes. For example, a parallel backend may use this
+// setting to constrol to which sequential backend it dispatches sequential
+// work.
+#ifndef _ALP_SECONDARY_BACKEND
+ #define _ALP_SECONDARY_BACKEND reference
+#endif
+
+/**
+ * The main GraphBLAS namespace.
+ *
+ * All GraphBLAS functions and objects are defined within.
+ */
+namespace alp {
+
+ /** Contains compile-time configuration constants. */
+ namespace config {
+
+ /** The default backend to be selected for an end user. */
+ static constexpr alp::Backend default_backend = _ALP_BACKEND;
+
+ /** The cache line size, in bytes. */
+ class CACHE_LINE_SIZE {
+
+ private:
+ /**
+ * The cache line size in bytes. Update this value at compile time to
+ * reflect the target architecture.
+ */
+ static constexpr size_t bytes = 64;
+
+ public:
+ /**
+ * @return The cache line size in bytes.
+ * @see alp::config::CACHE_LINE_SIZE::bytes
+ */
+ static constexpr size_t value() {
+ return bytes;
+ }
+ };
+
+ /** The SIMD size, in bytes. */
+ class SIMD_SIZE {
+
+ private:
+ /**
+ * The SIMD size, in bytes. Update this value at compile time to reflect
+ * the target architecture.
+ */
+ static constexpr size_t bytes = 32;
+
+ public:
+ /**
+ * @return The SIMD size in bytes.
+ * @see alp::config::SIMD_SIZE::bytes
+ */
+ static constexpr size_t value() {
+ return bytes;
+ }
+ };
+
+ /** How many elements of a given data type fit into a SIMD register. */
+ template< typename T >
+ class SIMD_BLOCKSIZE {
+ public:
+ /**
+ * Calculates the block size this operator should use.
+ *
+ * \warning This rounds down. If instances of T are too large, this could
+ * result in a zero value. See #value for a correction.
+ */
+ static constexpr size_t unsafe_value() {
+ return SIMD_SIZE::value() / sizeof( T );
+ }
+
+ /**
+ * The maximum of one and the number of elements that fit into a single
+ * cache line.
+ */
+ static constexpr size_t value() {
+ return unsafe_value() > 0 ? unsafe_value() : 1;
+ }
+ };
+
+ /**
+ * How many hardware threads the operating system exposes.
+ *
+ * \warning On contemporary x86-based hardware, the reported number by
+ * value() will include that of each hyper-thread. This number
+ * thus does not necessarily equal the number of cores available.
+ */
+ class HARDWARE_THREADS {
+ public:
+ /**
+ * Returns the number of online hardware threads as reported by the OS.
+ *
+ * \warning This is a UNIX system call.
+ *
+ * @returns The number of hardware threads currently online. The return
+ * type is specified by the UNIX standard.
+ */
+ static long value() {
+ return sysconf( _SC_NPROCESSORS_ONLN );
+ }
+ };
+
+ /** Benchmarking defaults. */
+ class BENCHMARKING {
+ public:
+ /** The default number of inner repititions. */
+ static constexpr size_t inner() {
+ return 1;
+ }
+
+ /** The default number of outer repititions. */
+ static constexpr size_t outer() {
+ return 10;
+ }
+ };
+
+ /** Memory defaults. */
+ class MEMORY {
+ public:
+
+ /** The private L1 data cache size, in bytes. */
+ static constexpr size_t l1_cache_size() {
+ return 32768;
+ }
+
+ /** What is considered a lot of memory, in 2-log of bytes. */
+ static constexpr size_t big_memory() {
+ return 31;
+ } // 2GB
+
+ /**
+ * The memory speed under random accesses of 8-byte words.
+ *
+ * @returns The requested speed in MiB/s/process.
+ *
+ * @note The default value was measured on a two-socket Ivy Bridge node
+ * with 128GB quad-channel DDR4 memory at 1600 MHz per socket.
+ *
+ * @note In the intended use of these variables, it is the ratio between
+ * #stream_memspeed and #random_access_memspeed that matters. While
+ * untested, it is reasonable to think the ratios do not change too
+ * much between architectures. Nevertheless, for best results, these
+ * numbers are best set to benchmarked values on the deployment
+ * hardware.
+ */
+ static constexpr double random_access_memspeed() {
+ return 147.298;
+ }
+
+ /**
+ * The memory speed under a limited number of streams of uncached data.
+ *
+ * @returns The requested speed in MiB/s/process.
+ *
+ * @note The default value was measured on a two-socket Ivy Bridge node
+ * with 128GB quad-channel DDR4 memory at 1600 MHz per socket.
+ *
+ * @note In the intended use of these variables, it is the ratio between
+ * #stream_memspeed and #random_access_memspeed that matters. While
+ * untested, it is reasonable to think the ratios do not change too
+ * much between architectures. Nevertheless, for best results, these
+ * numbers are best set to benchmarked values on the deployment
+ * hardware.
+ */
+ static constexpr double stream_memspeed() {
+ return 1931.264;
+ }
+
+ /**
+ * Prints memory usage info to stdout, but only for big memory allocations.
+ *
+ * @returns true if and only if this function printed information to stdout.
+ */
+ static bool report( const std::string prefix, const std::string action, const size_t size, const bool printNewline = true ) {
+#ifdef _ALP_NO_STDIO
+ (void)prefix;
+ (void)action;
+ (void)size;
+ (void)printNewline;
+ return false;
+#else
+ constexpr size_t big =
+ #ifdef _DEBUG
+ true;
+ #else
+ ( 1ul << big_memory() );
+ #endif
+ if( size >= big ) {
+ std::cout << "Info: ";
+ std::cout << prefix << " ";
+ std::cout << action << " ";
+ if( sizeof( size_t ) * 8 > 40 && ( size >> 40 ) > 2 ) {
+ std::cout << ( size >> 40 ) << " TB of memory";
+ } else if( sizeof( size_t ) * 8 > 30 && ( size >> 30 ) > 2 ) {
+ std::cout << ( size >> 30 ) << " GB of memory";
+ } else if( sizeof( size_t ) * 8 > 20 && ( size >> 20 ) > 2 ) {
+ std::cout << ( size >> 20 ) << " MB of memory";
+ } else if( sizeof( size_t ) * 8 > 10 && ( size >> 10 ) > 2 ) {
+ std::cout << ( size >> 10 ) << " kB of memory";
+ } else {
+ std::cout << size << " bytes of memory";
+ }
+ if( printNewline ) {
+ std::cout << ".\n";
+ }
+ return true;
+ }
+ return false;
+#endif
+ }
+ };
+
+ /**
+ * What data type should be used to store row indices.
+ *
+ * Some uses cases may require this to be set to size_t -- others may
+ * do with (much) smaller data types instead.
+ *
+ * \note The data type for indices of general arrays is not configurable. This
+ * set of implementations use size_t for those.
+ */
+ typedef unsigned int RowIndexType;
+
+ /**
+ * What data type should be used to store column indices.
+ *
+ * Some uses cases may require this to be set to size_t -- others may
+ * do with (much) smaller data types instead.
+ *
+ * \note The data type for indices of general arrays is not configurable. This
+ * set of implementations use size_t for those.
+ */
+ typedef unsigned int ColIndexType;
+
+ /**
+ * What data type should be used to refer to an array containing nonzeroes.
+ *
+ * Some uses cases may require this to be set to size_t -- others may
+ * do with (much) smaller data types instead.
+ *
+ * \note The data type for indices of general arrays is not configurable. This
+ * set of implementations use size_t for those.
+ */
+ typedef size_t NonzeroIndexType;
+
+ /**
+ * What data type should be used to store vector indices.
+ *
+ * Some uses cases may require this to be set to size_t -- others may
+ * do with (much) smaller data types instead.
+ *
+ * \note The data type for indices of general arrays is not configurable. This
+ * set of implementations use size_t for those.
+ */
+ typedef unsigned int VectorIndexType;
+
+ } // namespace config
+
+} // namespace alp
+
+#endif // end _H_ALP_CONFIG_BASE
diff --git a/include/alp/base/exec.hpp b/include/alp/base/exec.hpp
new file mode 100644
index 000000000..c011b7221
--- /dev/null
+++ b/include/alp/base/exec.hpp
@@ -0,0 +1,232 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 17th of April, 2017
+ */
+
+#ifndef _H_ALP_EXEC_BASE
+#define _H_ALP_EXEC_BASE
+
+#ifndef _ALP_NO_STDIO
+#include
+#endif
+#include
+#include
+
+#include
+#include
+
+
+namespace alp {
+
+ /**
+ * The various ways in which the #Launcher can be used
+ * to execute a GraphBLAS program.
+ *
+ * \warning An implementation may require different linker commands
+ * when using different modes. This is OK, since a call to
+ * the #Launcher is required to be quite different
+ * depending on which mode is used. The portability is in
+ * the GraphBLAS program being launched-- that one should
+ * never change depending on whichever mode it is used.
+ */
+ enum EXEC_MODE {
+
+ /**
+ * Automatic mode. The #Launcher can spawn user processes
+ * which will execute a given program.
+ */
+ AUTOMATIC = 0,
+
+ /**
+ * Manual mode. The user controls \a nprocs user processes
+ * which together should execute a given program, by, for
+ * example, using the #Launcher.
+ */
+ MANUAL,
+
+ /**
+ * When running from an MPI program. The user controls
+ * \a nprocs MPI programs, which, together, should execute
+ * a given GraphBLAS program.
+ */
+ FROM_MPI
+
+ };
+
+ /**
+ * Allows an auxiliary program to run any GraphBLAS program. Input data may be
+ * passed through a user-defined type. Output data will be retrieved via the
+ * same type. For implementations that support multiple user processes, the
+ * caller may explicitly set the process ID and total number of user processes.
+ *
+ * The intended use is to `just call' alp::exec which should, in its most
+ * trivial form, compile regardless of which backend is selected.
+ *
+ * @tparam mode Which #EXEC_MODE the Launcher should adhere to.
+ * @tparam implementation Which GraphBLAS implementation is to be used.
+ */
+ template< enum EXEC_MODE mode, enum Backend implementation >
+ class Launcher {
+
+ public :
+
+ /**
+ * Constructs a new Launcher. This constructor is a collective
+ * call; all \a nprocs processes that form a single Launcher
+ * group must make a call to this constructor at roughly the
+ * same time. There is an implementation-defined time-out for
+ * the creation of a Launcher group.
+ *
+ * @param[in] process_id The user process ID of the calling process.
+ * The value must be larger or equal to 0. This
+ * value must be strictly smaller than \a nprocs.
+ * This value must be unique to the calling
+ * process within this collective call across
+ * \em all \a nprocs user processes. This number
+ * \em must be strictly smaller than \a nprocs.
+ * Optional: the default is 0.
+ * @param[in] nprocs The total number of user processes making a
+ * collective call to this function. Optional: the
+ * default is 1.
+ * @param[in] hostname The hostname of one of the user processes.
+ * Optional: the default is `localhost'.
+ * @param[in] port A free port number at \a hostname. This port
+ * will be used for TCP connections to \a hostname
+ * if and only if \a nprocs is larger than one.
+ * Optional: the default value is `0'.
+ *
+ * @throws invalid_argument If #nprocs is zero.
+ * @throws invalid_argument If #process_id is greater than or
+ * equal to \a nprocs.
+ *
+ * \note An implementation may define further constraints on
+ * the input arguments, such as, obviously, on \a hostname
+ * and \a port, but also on \a nprocs and, as a result, on
+ * \a process_id.
+ */
+ Launcher( const size_t process_id = 0, // user process ID
+ const size_t nprocs = 1, // total number of user processes
+ const std::string hostname = "localhost", // one of the user process hostnames
+ const std::string port = "0" // a free port at hostname
+ ) { // standard does not specify any constrants on hostname and port
+ // so accept (and ignore) anything
+ (void)hostname; (void)port;
+
+#ifndef _ALP_NO_EXCEPTIONS
+ // sanity checks on process_id and nprocs
+ if( nprocs == 0 ) { throw std::invalid_argument( "Total number of user "
+ "processes must be "
+ "strictly larger than "
+ "zero." ); }
+ if( process_id >= nprocs ) {
+ throw std::invalid_argument( "Process ID must be strictly smaller than "
+ "total number of user processes." );
+ }
+#endif
+} // namespace alp
+
+/**
+ * Executes the given GraphBLAS program. This function, depending on whether
+ * GraphBLAS is compiled in automatic or in manual mode, will either
+ * \em spawn the maximum number of available user processes or will connect
+ * exactly \a nprocs existing processes, respectively, to execute the given
+ * \a alp_program.
+ *
+ * This is a collective function call.
+ *
+ * @tparam T The type of the data to pass to the GraphBLAS program.
+ * @tparam U The type of the output data to pass back to the user.
+ *
+ * @param[in] alp_program User GraphBLAS program to be executed.
+ * @param[in] data_in Input data of user-defined type \a T.
+ * When in automatic mode, the data will only be
+ * available at user process 0 only. When in
+ * manual mode, the data will be available to
+ * this user process (with the below given
+ * \a process_id) only.
+ * @param[out] data_out Output data of user-defined type \a U. The output
+ * data should be available at user process with ID
+ * zero.
+ * @param[in] broadcast Whether the input should be broadcast from user
+ * process 0 to all other user processes. Optional;
+ * the default value is \a false.
+ *
+ * @return SUCCESS If the execution proceeded as intended.
+ * @return PANIC If an unrecoverable error was encountered while trying to
+ * execute the given GraphBLAS program.
+ *
+ * \warning An implementation can define further constraints on the validity
+ * of input arguments. The most obvious is that implementations
+ * supporting only one user process will not accept \a nprocs larger
+ * than 1.
+ *
+ * All aforementioned default values shall always be legal.
+ */
+template< typename T, typename U >
+RC exec( void ( *alp_program )( const T &, U & ), // user GraphBLAS program
+ const T & data_in,
+ U & data_out, // input & output data
+ const bool broadcast = false ) const {
+ (void)alp_program;
+ (void)data_in;
+ (void)data_out;
+ (void)broadcast;
+ // stub implementation, should be overridden by specialised implementation,
+ // so return error code
+ return PANIC;
+}
+
+/**
+ * Variable size version of the above function.
+ *
+ * @param[in] broadcast Whether the input should be broadcast from user
+ * process 0 to all other user processes. Optional;
+ * the default value is \a false. This will let user
+ * processes with ID larger than zero allocate
+ * \a in_size bytes of memory into which the data at
+ * process 0 will be copied.
+ *
+ * \todo more documentation
+ */
+template< typename U >
+RC exec( void ( *alp_program )( const void *, const size_t, U & ), const void * data_in, const size_t in_size, U & data_out, const bool broadcast = false ) const {
+ (void)alp_program;
+ (void)data_in;
+ (void)in_size;
+ (void)data_out;
+ (void)broadcast;
+ return PANIC;
+}
+
+/**
+ * Releases all GraphBLAS resources. After a call to this function, no
+ * GraphBLAS library functions may be called any longer.
+ *
+ * @return SUCCESS A call to this function may never fail.
+ */
+static RC finalize() {
+ return PANIC;
+}
+}
+; // end class `Launcher'
+
+} // end namespace ``alp''
+
+#endif // end _H_ALP_EXEC_BASE
diff --git a/include/alp/base/init.hpp b/include/alp/base/init.hpp
new file mode 100644
index 000000000..b40093cf2
--- /dev/null
+++ b/include/alp/base/init.hpp
@@ -0,0 +1,183 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 24th of January, 2017
+ */
+
+#ifndef _H_ALP_INIT_BASE
+#define _H_ALP_INIT_BASE
+
+#include
+
+#include "config.hpp"
+
+namespace alp {
+
+ /**
+ * Initialises the calling user process.
+ *
+ * If the backend supports multiple user processes, the user can invoke this
+ * function with \a P equal to one or higher; if the backend supports only a
+ * single user process, then \a P must equal one.
+ * The value for the user process ID \a s must be larger or equal to zero and
+ * must be strictly smaller than \a P. If \a P > 1, each user process must
+ * call this function collectively, each user process should pass the same
+ * value for \a P, and each user process should pass a unique value for \a s
+ * amongst all \a P collective calls made.
+ *
+ * An implementation may define that additional data is required for a call to
+ * this function to complete successfully. Such data may be passed via the
+ * final argument to this function, \a implementation_data.
+ *
+ * If the implementation does not support multiple user processes, then a
+ * value for \a implementation_data shall not be required. In parcticular, a
+ * call to this function with an empty parameter list shall then be legal
+ * and infer the following default arguments: zero for \a s, one for \a P,
+ * and \a NULL for \a implementation_data. When such an implementation is
+ * requested to initialise multiple user processes, the alp::UNSUPPORTED
+ * error code shall be returned.
+ *
+ * A call to this function must be matched with a call to alp::finalize().
+ * After a successful call to this function, a new call to alp::init() without
+ * first calling alp::finalize() shall incur undefined behaviour. The
+ * construction of GraphBLAS containers without a preceding successful call
+ * to alp::init() will result in invalid GraphBLAS objects. Any valid
+ * GraphBLAS containers will become invalid after a call to alp::finalize().
+ * Any use of GraphBLAS functions on invalid containers will result in
+ * undefined behaviour.
+ *
+ * @tparam backend Which GraphBLAS backend this call to init initialises.
+ *
+ * @param[in] s The ID of this user process.
+ * @param[in] P The total number of user processes.
+ * @param[in] implementation_data Any implementation-defined data structure
+ * required for successful completion of this
+ * call.
+ *
+ * \note For a pure MPI implementation, for instance, \a implementation_data
+ * may be a pointer to the MPI communicator corresponding to these user
+ * processes.
+ *
+ * \note The implementations based on PlatformBSP require direct passing of
+ * the \a bsp_t corresponding to the BSP context of the user processes;
+ * this is legal since the PlatformBSP specification defines the
+ * \a bsp_t type as a void pointer.
+ *
+ * @return SUCCESS If the initialisation was successful.
+ * @return UNSUPPORTED When the implementation does not support multiple
+ * user processes (\a P larger than 1). After a call to
+ * this function exits with this error code the library
+ * state shall be as though the call never were made.
+ * @return PANIC If this function fails, the state of this GraphBLAS
+ * implementation becomes undefined.
+ *
+ * \note There is no argument checking. If \a s is larger or equal to \a P,
+ * undefined behaviour occurs. If \a implementation_data was invalid
+ * or corrupted, undefined behaviour occurs.
+ *
+ * \par Performance semantics
+ * None. Implementations are encouraged to specify the complexity of
+ * their implementation of this function in terms of \a P.
+ *
+ * \note Compared to the GraphBLAS C specification, this function lacks a
+ * choice whether to execute in `blocking' or `non-blocking' mode.
+ * All functions in the Huawei GraphBLAS are blocking. A choice
+ * between blocking and non-blocking execution may be added later.
+ * \note Note that a blocking mode is a valid implementation of a non-
+ * blocking mode, as specified in the GraphBLAS C API. Therefore
+ * this specification will still yield a valid implementation of
+ * the C API when properly wrapped.
+ * \note Non-blocking mode with clear performance semantics are possible via
+ * carefully designed algorithmic skeletons. This is future work.
+ * \note This specification allows for alp::init() to be called multiple
+ * times from the same process and the same thread, as long as all the
+ * above requirements are met at each call. The parameters \a s and
+ * \a P (and \a implementation_data) may differ each time.
+ * \note This is an extension of the GraphBLAS C API, whom only allow a
+ * single initialisation and a single matching finalisation.
+ * \note The GraphBLAS C API does not have the notion of user processes. We
+ * believe this notion is necessary to properly integrate into parallel
+ * frameworks, and also to affect proper and efficient parallel I/O.
+ */
+ template< enum Backend backend = config::default_backend >
+ RC init( const size_t s, const size_t P, void * const implementation_data ) {
+ (void)s;
+ (void)P;
+ (void)implementation_data;
+ return PANIC;
+ }
+
+ /**
+ * Implementations must ensure that initialisation without explicitly given
+ * values regarding user processes etc. should still result in a successful
+ * initialisation in all cases except where it cannot initialise due to
+ * external factors.
+ * A call to this function could, for instance, reduce to a full alp::init()
+ * while using the default parameters 0 for \a s, 1 for \a P, and \a NULL for
+ * \a implementation_data:
+ * \code
+ * return init< backend >( 0, 1, NULL );
+ * \endcode
+ *
+ * @tparam backend The backend implementation to initialise.
+ *
+ * @return SUCCESS If the initialisation was successful.
+ * @return PANIC If this function fails, the state of this GraphBLAS
+ * implementation becomes undefined.
+ */
+ template< enum Backend backend = config::default_backend >
+ RC init() {
+ return alp::init< backend >( 0, 1, NULL );
+ }
+
+ /**
+ * Finalises a graphBLAS context opened by the last call to alp::init().
+ *
+ * This function must be called collectively and must follow a call to
+ * alp::init(). After successful execution of this function, a new call
+ * to alp::init() may be made.
+ * After a call to this function, any graphBLAS objects that remain in scope
+ * are invalid. The only graphBLAS functions on invalid containers which
+ * shall \em not incur undefined behaviour are their destructors.
+ *
+ * \warning Invalid GraphBLAS containers will remain invalid no matter if a
+ * next call to alp::init() is made.
+ *
+ * @tparam backend Which GraphBLAS backend this call to init initialises.
+ *
+ * @return SUCCESS If the initialisation was successful.
+ * @return PANIC If this function fails, the state of the GraphBLAS
+ * implementation becomes undefined. This means none of its
+ * functions should be called during the remainder program
+ * execution; in particular this means a new call to
+ * alp::init() will not remedy the situaiton.
+ *
+ * \par Performance semantics
+ * None. Implementations are encouraged to specify the complexity of
+ * their implementation of this function in terms of the parameter
+ * \a P the matching call to alp::init() was called with.
+ */
+ template< enum Backend backend = config::default_backend >
+ RC finalize() {
+ return PANIC;
+ }
+
+} // namespace alp
+
+#endif // end _H_ALP_INIT_BASE
diff --git a/include/alp/base/internalops.hpp b/include/alp/base/internalops.hpp
new file mode 100644
index 000000000..1f9592378
--- /dev/null
+++ b/include/alp/base/internalops.hpp
@@ -0,0 +1,3178 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 8th of August, 2016
+ */
+
+#ifndef _H_ALP_INTERNAL_OPERATORS_BASE
+#define _H_ALP_INTERNAL_OPERATORS_BASE
+
+#include
+#include
+
+#include
+#include
+#include
+
+#include "config.hpp"
+
+
+namespace alp {
+
+ namespace operators {
+
+ /** Core implementations of the standard operators in #alp::operators. */
+ namespace internal {
+
+ /**
+ * Standard argmin operator.
+ *
+ * Takes std::pair< index, value > domains only.
+ *
+ * Given two pairs (i1,v1), (i2,v2)
+ * - returns (i1,v1) if v1
+ class argmin {
+
+ static_assert( std::is_integral< IType >::value,
+ "Argmin operator may only be constructed using integral index "
+ "types." );
+
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef std::pair< IType, VType > left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef std::pair< IType, VType > right_type;
+
+ /** Alias to the output data type. */
+ typedef std::pair< IType, VType > result_type;
+
+ /** Whether this operator has an inplace foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an inplace foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of the operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ if( a->second < b->second ) {
+ c->first = a->first;
+ c->second = a->second;
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ if( a->second < c->second ) {
+ c->first = a->first;
+ c->second = a->second;
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ if( b->second <= c->second ) {
+ c->first = b->first;
+ c->second = b->second;
+ }
+ }
+ };
+
+ /**
+ * Standard argmax operator.
+ *
+ * Takes std::pair< index, value > domains only.
+ *
+ * Given two pairs (i1,v1), (i2,v2)
+ * - returns (i1,v1) if v1>v2, OR
+ * - returns (i2,v2) otherwise.
+ */
+ template< typename IType, typename VType >
+ class argmax {
+
+ static_assert( std::is_integral< IType >::value,
+ "Argmin operator may only be constructed using integral index "
+ "types." );
+
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef std::pair< IType, VType > left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef std::pair< IType, VType > right_type;
+
+ /** Alias to the output data type. */
+ typedef std::pair< IType, VType > result_type;
+
+ /** Whether this operator has an inplace foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an inplace foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of the operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ if( a->second > b->second ) {
+ c->first = a->first;
+ c->second = a->second;
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ if( a->second > c->second ) {
+ c->first = a->first;
+ c->second = a->second;
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ if( b->second >= c->second ) {
+ c->first = b->first;
+ c->second = b->second;
+ }
+ }
+ };
+
+ /**
+ * Standard left-hand side assignment operator.
+ *
+ * Takes binary input, but ignores the right-hand side input and simply
+ * assigns the left-hand side input to the output variable.
+ *
+ * Assumes native availability of = on the given data types, or assumes
+ * the relevant operators are properly overloaded.
+ *
+ * Assumes a binary operator defined using the =-operator in the following
+ * way, is \em associative:
+ * \code
+ * void left_assign( const IN1 x, const IN2 y, OUT &out ) {
+ * (void)y;
+ * out = x;
+ * }
+ * \endcode
+ *
+ * Non-standard or non-matching data types, or non-standard (overloaded) =
+ * operators should be used with caution and may necessitate an explicit
+ * definition as a GraphBLAS operator with the #has_foldl, #has_foldr, and
+ * the other fields, set as required.
+ *
+ * @tparam IN1 The left-hand input data type.
+ * @tparam IN2 The right-hand input data type.
+ * @tparam OUT The output data type.
+ */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class left_assign {
+
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an inplace foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an inplace foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = false;
+
+ /**
+ * Out-of-place application of the addition c = a.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ (void)b;
+ *c = static_cast< result_type >( *a );
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ *c = static_cast< result_type >( *a );
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ (void)b;
+ (void)c;
+ }
+ };
+
+ /**
+ * Standard right-hand side assignment operator.
+ *
+ * Takes binary input, but ignores the right-hand side input and simply
+ * assigns the left-hand side input to the output variable.
+ *
+ * Assumes native availability of = on the given data types, or assumes
+ * the relevant operators are properly overloaded.
+ *
+ * Assumes a binary operator defined using the =-operator in the following
+ * way, is \em associative:
+ * \code
+ * void right_assign( const IN1 x, const IN2 y, OUT &out ) {
+ * (void)x;
+ * out = y;
+ * }
+ * \endcode
+ *
+ * Non-standard or non-matching data types, or non-standard (overloaded) =
+ * operators should be used with caution and may necessitate an explicit
+ * definition as a GraphBLAS operator with the #has_foldl, #has_foldr, and
+ * the other fields, set as required.
+ *
+ * @tparam IN1 The left-hand input data type.
+ * @tparam IN2 The right-hand input data type.
+ * @tparam OUT The output data type.
+ */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class right_assign {
+
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an inplace foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an inplace foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = false;
+
+ /**
+ * Out-of-place application of the addition c = a.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ (void)a;
+ *c = *b;
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ (void)a;
+ (void)c;
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ *c = static_cast< result_type >( *b );
+ }
+ };
+
+ /**
+ * Left-sided operator that combines an indicator and an identity function
+ * as follows:
+ *
+ * \f$ z = x \odot y = x \text{ if } y \text{evaluates true}. \f$
+ *
+ * If \f$ x \f$ does not evaluate true the operator shall have no effect.
+ */
+ template< typename D1, typename D2, typename D3, enum Backend implementation = config::default_backend >
+ class left_assign_if {
+
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef D1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef D2 right_type;
+
+ /** Alias to the output data type. */
+ typedef D3 result_type;
+
+ /** Whether this operator has an inplace foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an inplace foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of the addition c = a.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ */
+ static void apply( const D1 * __restrict__ const a, const D2 * __restrict__ const b, D3 * __restrict__ const c ) {
+ if( static_cast< const bool >( *b ) ) {
+ *c = *a;
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const D1 * __restrict__ const a, D3 * __restrict__ const c ) {
+ if( static_cast< const bool >( *c ) ) {
+ *c = static_cast< D3 >( *a );
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( D3 * __restrict__ const c, const D2 * __restrict__ const b ) {
+ if( static_cast< bool >( *b ) ) {
+ *c = static_cast< D3 >( static_cast< D1 >( *c ) );
+ }
+ }
+ };
+
+ /**
+ * Right-sided operator that combines an indicator and an identity function
+ * as follows:
+ *
+ * \f$ z = x \odot y = y \text{ if } x \text{evaluates true}. \f$
+ *
+ * If \f$ x \f$ does not evaluate true the operator shall have no effect.
+ */
+ template< typename D1, typename D2, typename D3, enum Backend implementation = config::default_backend >
+ class right_assign_if {
+
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef D1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef D2 right_type;
+
+ /** Alias to the output data type. */
+ typedef D3 result_type;
+
+ /** Whether this operator has an inplace foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an inplace foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of the addition c = a.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ */
+ static void apply( const D1 * __restrict__ const a, const D2 * __restrict__ const b, D3 * __restrict__ const c ) {
+ if( static_cast< const bool >( *a ) ) {
+ *c = *b;
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const D1 * __restrict__ const a, D3 * __restrict__ const c ) {
+ if( static_cast< const bool >( *a ) ) {
+ *c = static_cast< D3 >( static_cast< D2 >( *c ) );
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( D3 * __restrict__ const c, const D2 * __restrict__ const b ) {
+ if( static_cast< bool >( *c ) ) {
+ *c = static_cast< D3 >( *b );
+ }
+ }
+ };
+
+ /**
+ * Standard additive operator.
+ *
+ * Assumes native availability of + on the given data types or assumes that
+ * the relevant operators are properly overloaded.
+ *
+ * Assumes that the + operator is associative \em and commutative when
+ * assuming perfect arithmetic and equal data types for \a IN1, \a IN2, and
+ * \a OUT.
+ *
+ * Non-standard or non-matching data types or non-standard (overloaded) +
+ * operators, should therefore be used with caution and may necessitate an
+ * explicit definition as a GraphBLAS operator with the #is_associative and
+ * #is_commutative fields, and others, set as required.
+ *
+ * @tparam IN1 The left-hand input data type.
+ * @tparam IN2 The right-hand input data type.
+ * @tparam OUT The output data type.
+ */
+ // [Example Base Operator Implementation]
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class add {
+
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an inplace foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an inplace foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of the addition c = a + b.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * \warning Passing invalid pointers will result in UB.
+ */
+ static void apply( const left_type * __restrict__ const a,
+ const right_type * __restrict__ const b,
+ result_type * __restrict__ const c
+ ) {
+ ALP_UTIL_IGNORE_MAYBE_UNINITIALIZED // this is a (too) broad suppression--
+ // see internal issue 306 for rationale
+ *c = *a + *b;
+ ALP_UTIL_RESTORE_WARNINGS
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ *
+ * \warning Passing invalid pointers will result in UB.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ *c += *a;
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ *
+ * \warning Passing invalid pointers will result in UB.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ *c += *b;
+ }
+ };
+ // [Example Base Operator Implementation]
+
+ /**
+ * Standard multiplicative operator.
+ *
+ * Assumes native availability * on the given data types, or assumes
+ * the relevant operators are properly overloaded.
+ *
+ * Assumes that the * operator is associative \em and commutative when
+ * assuming perfect arithmetic and equal data types for \a IN1, \a IN2, and
+ * \a OUT.
+ *
+ * Non-standard or non-matching data types or non-standard (overloaded) *
+ * operators, should therefore be used with caution and may necessitate an
+ * explicit definition as a GraphBLAS operator with the #is_associative and
+ * #is_commutative fields, and others, set as required.
+ *
+ * @tparam IN1 The left-hand input data type.
+ * @tparam IN2 The right-hand input data type.
+ * @tparam OUT The output data type.
+ */
+ template<
+ typename IN1, typename IN2, typename OUT,
+ enum Backend implementation = config::default_backend
+ >
+ class mul {
+
+ public:
+
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of the multiplication c = a * b.
+ *
+ * @param[in] a Pointer to the left-hand side input. Must be initialised.
+ * @param[in] b Pointer to the right-hand side input. Must be initialised.
+ * @param[out] c Pointer to where to compute the output.
+ *
+ * \warning All pointers must be valid or UB occurs.
+ */
+ static void apply(
+ const left_type * __restrict__ const a,
+ const right_type * __restrict__ const b,
+ result_type * __restrict__ const c
+ ) {
+ ALP_UTIL_IGNORE_MAYBE_UNINITIALIZED // this is a (too) broad suppression--
+ // see internal issue 306 for rationale
+ *c = *a * *b;
+ ALP_UTIL_RESTORE_WARNINGS
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ *c *= *a;
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ *c *= *b;
+ }
+ };
+
+ /**
+ * Standard max operator.
+ *
+ * Assumes native availability of < on the given data types, or assumes
+ * the relevant operators are properly overloaded.
+ *
+ * Non-standard or non-matching data types, or non-standard (overloaded) <
+ * operators, should be used with caution and may necessitate an explicit
+ * definition as a GraphBLAS operator with the #is_associative and
+ * #is_commutative fields, and others, set as required.
+ *
+ * @tparam IN1 The left-hand input data type.
+ * @tparam IN2 The right-hand input data type.
+ * @tparam OUT The output data type.
+ */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class max {
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of the max operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = \max\{a,b\} \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ if( *a < *b ) {
+ *c = static_cast< OUT >( *b );
+ } else {
+ *c = static_cast< OUT >( *a );
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ if( *a > *c ) {
+ *c = *a;
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ if( *b > *c ) {
+ *c = *b;
+ }
+ }
+ };
+
+ /**
+ * Standard min operator.
+ *
+ * Assumes native availability of > on the given data types, or assumes
+ * the relevant operators are properly overloaded.
+ *
+ * Non-standard or non-matching data types, or non-standard (overloaded) >
+ * operators, should be used with caution and may necessitate an explicit
+ * definition as a GraphBLAS operator with the #is_associative and
+ * #is_commutative fields, and others, set as required.
+ *
+ * @tparam IN1 The left-hand input data type.
+ * @tparam IN2 The right-hand input data type.
+ * @tparam OUT The output data type.
+ */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class min {
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of the min operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = \min\{a,b\} \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ if( *a > *b ) {
+ *c = static_cast< OUT >( *b );
+ } else {
+ *c = static_cast< OUT >( *a );
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ if( *a < *c ) {
+ *c = *a;
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ if( *b < *c ) {
+ *c = *b;
+ }
+ }
+ };
+
+ /** \todo add documentation */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class substract {
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = false;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = false;
+
+ /**
+ * Out-of-place application of this operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = \min\{a,b\} \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ *c = *a - *b;
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ *c = *a - *c;
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ *c -= *b;
+ }
+ };
+
+ /** \todo add documentation */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class divide {
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = false;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = false;
+
+ /**
+ * Out-of-place application of this operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = a/b \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ *c = *a / *b;
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ *c = *a / *c;
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ *c /= *b;
+ }
+ };
+
+ /** \todo add documentation */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class divide_reverse {
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = false;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = false;
+
+ /**
+ * Out-of-place application of this operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = b/a \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ *c = *b / *a;
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ *c /= *a;
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ *c = *b / *c;
+ }
+ };
+
+ /** \todo add documentation */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class equal {
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of this operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = \min\{a,b\} \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ if( *a == *b ) {
+ *c = static_cast< OUT >( true );
+ } else {
+ *c = static_cast< OUT >( false );
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ if( *a == *c ) {
+ *c = static_cast< result_type >( true );
+ } else {
+ *c = static_cast< result_type >( false );
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ if( *b == *c ) {
+ *c = static_cast< result_type >( true );
+ } else {
+ *c = static_cast< result_type >( false );
+ }
+ }
+ };
+
+ /** \todo add documentation */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class not_equal {
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of this operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = \min\{a,b\} \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ ALP_UTIL_IGNORE_MAYBE_UNINITIALIZED // this is a (too) broad suppression--
+ // see internal issue 306 for rationale
+ if( *a != *b ) {
+ *c = static_cast< OUT >( true );
+ } else {
+ *c = static_cast< OUT >( false );
+ }
+ ALP_UTIL_RESTORE_WARNINGS
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ if( *a != *c ) {
+ *c = static_cast< result_type >( true );
+ } else {
+ *c = static_cast< result_type >( false );
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ if( *b != *c ) {
+ *c = static_cast< result_type >( true );
+ } else {
+ *c = static_cast< result_type >( false );
+ }
+ }
+ };
+
+ /** \todo add documentation */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class any_or {
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of this operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = \min\{a,b\} \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ if( *a ) {
+ *c = static_cast< OUT >( *a );
+ } else if( *b ) {
+ *c = static_cast< OUT >( *b );
+ } else {
+ assert( ! ( *a ) );
+ *c = static_cast< OUT >( *a );
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ if( *a ) {
+ *c = static_cast< result_type >( *a );
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ if( *b ) {
+ *c = static_cast< result_type >( *b );
+ }
+ }
+ };
+
+ /** \todo add documentation */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class logical_or {
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of this operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = \min\{a,b\} \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ if( *a || *b ) {
+ *c = static_cast< OUT >( true );
+ } else {
+ *c = static_cast< OUT >( false );
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ if( *a || *c ) {
+ *c = static_cast< result_type >( true );
+ } else {
+ *c = static_cast< result_type >( false );
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ if( *b || *c ) {
+ *c = static_cast< result_type >( true );
+ } else {
+ *c = static_cast< result_type >( false );
+ }
+ }
+ };
+
+ /** \todo add documentation */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class logical_and {
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of this operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = \min\{a,b\} \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ if( *a && *b ) {
+ *c = static_cast< OUT >( true );
+ } else {
+ *c = static_cast< OUT >( false );
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ if( *a && *c ) {
+ *c = static_cast< result_type >( true );
+ } else {
+ *c = static_cast< result_type >( false );
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ if( *b && *c ) {
+ *c = static_cast< result_type >( true );
+ } else {
+ *c = static_cast< result_type >( false );
+ }
+ }
+ };
+
+ /** \todo add documentation */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class abs_diff {
+
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = false;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of this operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = \min\{a,b\} \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ if( *a < *b ) {
+ *c = static_cast< OUT >( *b - *a );
+ } else {
+ *c = static_cast< OUT >( *a - *b );
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ if( *a < *c ) {
+ *c -= *a;
+ } else {
+ *c = static_cast< OUT >( *a - *c );
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ if( *b < *c ) {
+ *c -= *b;
+ } else {
+ *c = static_cast< OUT >( *b - *c );
+ }
+ }
+ };
+
+ /** \todo add documentation */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class relu {
+ public:
+ /** Alias to the left-hand input data type. */
+ typedef IN1 left_type;
+
+ /** Alias to the right-hand input data type. */
+ typedef IN2 right_type;
+
+ /** Alias to the output data type. */
+ typedef OUT result_type;
+
+ /** Whether this operator has an in-place foldl. */
+ static constexpr bool has_foldl = true;
+
+ /** Whether this operator has an in-place foldr. */
+ static constexpr bool has_foldr = true;
+
+ /**
+ * Whether this operator is \em mathematically associative; that is,
+ * associative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_associative = true;
+
+ /**
+ * Whether this operator is \em mathematically commutative; that is,
+ * commutative when assuming equivalent data types for \a IN1, \a IN2,
+ * and \a OUT, as well as assuming exact arithmetic, no overflows, etc.
+ */
+ static constexpr bool is_commutative = true;
+
+ /**
+ * Out-of-place application of this operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = ReLU\{a,b\} = \begin{cases}
+ * a \text{, if } a>b \\
+ * b \text{, otherwise}
+ * \end{cases}\f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ if( *a < *b ) {
+ *c = static_cast< OUT >( *b );
+ } else {
+ *c = static_cast< OUT >( *a );
+ }
+ }
+
+ /**
+ * In-place left-to-right folding.
+ *
+ * @param[in] a Pointer to the left-hand side input data.
+ * @param[in,out] c Pointer to the right-hand side input data. This also
+ * dubs as the output memory area.
+ */
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ if( *a > *c ) {
+ *c = *a;
+ }
+ }
+
+ /**
+ * In-place right-to-left folding.
+ *
+ * @param[in,out] c Pointer to the left-hand side input data. This also
+ * dubs as the output memory area.
+ * @param[in] b Pointer to the right-hand side input data.
+ */
+ static void foldl( result_type * __restrict__ const c, const right_type * __restrict__ const b ) {
+ if( *b > *c ) {
+ *c = *b;
+ }
+ }
+ };
+
+ template< typename D1, typename D2, typename D3, enum Backend implementation = config::default_backend >
+ class square_diff {
+ public:
+ typedef D1 left_type;
+ typedef D2 right_type;
+ typedef D3 result_type;
+
+ static constexpr bool has_foldl = true;
+ static constexpr bool has_foldr = true;
+ static constexpr bool is_associative = false;
+ static constexpr bool is_commutative = true;
+
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ *c = ( *a - *b ) * ( *a - *b );
+ }
+
+ static void foldr( const left_type * __restrict__ const a, result_type * __restrict__ const c ) {
+ *c = ( *a - *c ) * ( *a - *c );
+ }
+
+ static void foldl( const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ *c = ( *c - *b ) * ( *c - *b );
+ }
+ };
+
+ /**
+ * left operand of type IN1,
+ * right operand of type IN2
+ * result of type std::pair< IN1, IN2 >
+ *
+ * for use together with argmin
+ */
+ template< typename IN1, typename IN2, enum Backend implementation = config::default_backend >
+ class zip {
+ public:
+ typedef IN1 left_type;
+ typedef IN2 right_type;
+ typedef std::pair< IN1, IN2 > result_type;
+
+ static constexpr bool has_foldl = false;
+ static constexpr bool has_foldr = false;
+ static constexpr bool is_associative = false;
+ static constexpr bool is_commutative = false;
+
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ *c = std::make_pair( *a, *b );
+ }
+ };
+
+ /**
+ * compares the first argument of a pair
+ */
+ template< typename IN1, typename IN2, typename OUT, enum Backend implementation = config::default_backend >
+ class equal_first {
+ public:
+ typedef IN1 left_type;
+
+ typedef IN2 right_type;
+
+ typedef OUT result_type;
+
+ static constexpr bool has_foldl = false;
+ static constexpr bool has_foldr = false;
+ static constexpr bool is_associative = false;
+ static constexpr bool is_commutative = false;
+
+ /**
+ * Out-of-place application of this operator.
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ * @param[out] c The output. Must be pre-allocated.
+ *
+ * At the end of the operation, \f$ c = a->first == b->first \f$.
+ */
+ static void apply( const left_type * __restrict__ const a, const right_type * __restrict__ const b, result_type * __restrict__ const c ) {
+ if( a->first == b->first ) {
+ *c = static_cast< OUT >( true );
+ } else {
+ *c = static_cast< OUT >( false );
+ }
+ }
+ };
+
+ /**
+ * This class takes a generic operator implementation and exposes a more
+ * convenient apply() function based on it. This function allows arbitrary
+ * data types being passed as parameters, and automatically handles any
+ * casting required for the raw operator.
+ *
+ * @tparam OP The generic operator implementation.
+ *
+ * @see Operator for full details.
+ */
+ template< typename OP, enum Backend implementation = config::default_backend >
+ class OperatorBase {
+
+ protected:
+ /** The block size that should be used during map-like operations. */
+ static constexpr size_t blocksize = alp::utils::static_min( alp::config::SIMD_BLOCKSIZE< typename OP::left_type >::value(),
+ alp::utils::static_min( alp::config::SIMD_BLOCKSIZE< typename OP::right_type >::value(), alp::config::SIMD_BLOCKSIZE< typename OP::result_type >::value() ) );
+
+ /** The left-hand side input domain. */
+ typedef typename OP::left_type D1;
+
+ /** The right-hand side input domain. */
+ typedef typename OP::right_type D2;
+
+ /** The output domain. */
+ typedef typename OP::result_type D3;
+
+ public:
+ /** @return Whether this operator is mathematically associative. */
+ static constexpr bool is_associative() {
+ return OP::is_associative;
+ }
+
+ /** @return Whether this operator is mathematically commutative. */
+ static constexpr bool is_commutative() {
+ return OP::is_commutative;
+ }
+
+ /**
+ * Straightforward application of this operator. Computes \f$ x \odot y \f$
+ * and stores the result in \a z.
+ *
+ * @tparam InputType1 The type of the input parameter \a x.
+ * @tparam InputType2 The type of the input parameter \a y.
+ * @tparam OutputType The type of the output parameter \a z.
+ *
+ * \warning If \a InputType1 does not match \a D! \em or \a InputType2 does
+ * not match \a D2 \em or \a OutputType does not match \a D3, then
+ * the input will be cast into temporary variables of the correct
+ * types, while the output will be cast from a temporary variable,
+ *
+ * \note Best performance is thus only guaranteed when all domains match.
+ *
+ * @param[in] x The left-hand side input.
+ * @param[in] y The right-hand side input.
+ * @param[out] z The output element.
+ */
+ template< typename InputType1, typename InputType2, typename OutputType >
+ static void apply( const InputType1 & x, const InputType2 & y, OutputType & z ) {
+ const D1 a = static_cast< D1 >( x );
+ const D2 b = static_cast< D2 >( y );
+ D3 temp;
+ OP::apply( &a, &b, &temp );
+ z = static_cast< OutputType >( temp );
+ }
+
+ /**
+ * This is the high-performance version of apply() in the sense that no
+ * casting is required. This version will be automatically caled whenever
+ * possible.
+ */
+ static void apply( const D1 & x, const D2 & y, D3 & out ) {
+ OP::apply( &x, &y, &out );
+ }
+ };
+
+ /**
+ * A class capable of adding an out-of-place \a foldr function for an
+ * operator that is not fold-right capable, or capable of adding an in-
+ * place foldr function for an operator that is fold-right capable. For
+ * fold-right capable operators, this class is also capable of adding
+ * an efficient eWiseApply function.
+ *
+ * An operator is fold-right capable when the Base Operator \a OP
+ * provides an in-place foldr implementation, \em and whenever \a D1
+ * equals \a D3. If one of either requirements is not met, then \a OP
+ * is not fold-right capable and this class is selected to add an out-
+ * of-place foldr function.
+ *
+ * @tparam OP The generic operator implementation.
+ * @tparam guard This typename is void if and only if \a OP is not fold-
+ * right capable. In this case, this class adds an out-of-
+ * place foldr implementation to the operator.
+ * If it is not void, then this class defines an
+ * in-place foldr implementation instead.
+ *
+ * \note This specific class corresponds to the \a guard variable equal to
+ * \a void.
+ *
+ * @see Operator for full details.
+ * @see OperatorBase for additional functions exposed to the final operator.
+ */
+ template< typename OP, typename guard = void, enum Backend implementation = config::default_backend >
+ class OperatorFR : public OperatorBase< OP > {
+
+ public:
+ /**
+ * Emulated in-place application of this operator on two data elements.
+ *
+ * Computes \f$ x \odot y \f$ and writes the result into \f$ y \f$.
+ *
+ * We wish to call this in-place variant internally for brevity. However,
+ * if \a OP has no in-place variant, then we must cache the previous
+ * value of the output element or otherwise we will breach the
+ * __restrict__ contract of OP::apply.
+ * The caller must ensure the appropriate domains and casting behaviour
+ * is applicable. Note that a user is never to call these functions
+ * explicitly.
+ *
+ * @tparam InputType The type of the parameter \a x.
+ * @tparam IOType The type of the parameter \a y.
+ *
+ * \warning Additional casting and use of temporary variables may occur
+ * when \a InputType does not match \a D1 \em or \a IOType
+ * does not match \a D3.
+ *
+ * \note This implementation relies on apply().
+ *
+ * @param[in] x The value that is to be applied to \a y.
+ * @param[in,out] y The value \a x is to be applied against.
+ */
+ template< typename InputType, typename IOType >
+ static void foldr( const InputType & x, IOType & y ) {
+ typedef typename OperatorBase< OP >::D2 D2;
+ const D2 cache = static_cast< D2 >( y );
+ OperatorBase< OP >::apply( x, cache, y );
+ }
+
+ /**
+ * Out-of-place element-wise foldr function. Calculates
+ * \f$\forall\ i \in \{ 0, 1, \ldots, n - 1 \}, \f$
+ * \f$ x_i \odot z_i \f$ and stores the result into
+ * \f$ z_i \f$.
+ *
+ * @tparam InputType The type of elements in \a x.
+ * @tparam IOType The type of elements in \a z.
+ *
+ * @param x The left-hand side input data.
+ * @param z Where \a x shall be mapped into.
+ * @param n How many data elements \a x and \a z contain.
+ *
+ * This version requires three buffers, streams \a x once,
+ * and streams \a z twice (once for reading, once for
+ * writing.
+ */
+ template< typename InputType, typename IOType >
+ static void eWiseFoldrAA( const InputType * __restrict__ const x, IOType * __restrict__ const z, const size_t n ) {
+ // local buffers
+ typedef typename OperatorBase< OP >::D1 D1;
+ typedef typename OperatorBase< OP >::D2 D2;
+ typedef typename OperatorBase< OP >::D3 D3;
+ D1 left_buffer[ OperatorBase< OP >::blocksize ];
+ D2 right_buffer[ OperatorBase< OP >::blocksize ];
+ D3 result_buffer[ OperatorBase< OP >::blocksize ];
+
+ // blockwise application
+ size_t i = 0;
+ while( i + OperatorBase< OP >::blocksize <= n ) {
+ // load into buffers
+ for( size_t b = 0; b < OperatorBase< OP >::blocksize; ++i, ++b ) {
+ left_buffer[ b ] = static_cast< D1 >( x[ i ] );
+ right_buffer[ b ] = static_cast< D2 >( z[ i ] );
+ }
+
+ // rewind source and output
+ i -= OperatorBase< OP >::blocksize;
+
+ // operate within buffer
+ for( size_t b = 0; b < OperatorBase< OP >::blocksize; ++b ) {
+ OP::apply( &( left_buffer[ b ] ), &( right_buffer[ b ] ), &( result_buffer[ b ] ) );
+ }
+
+ // write back result
+ for( size_t b = 0; b < OperatorBase< OP >::blocksize; ++i, ++b ) {
+ z[ i ] = static_cast< IOType >( result_buffer[ b ] );
+ }
+ }
+
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ left_buffer[ 0 ] = static_cast< D1 >( x[ i ] );
+ right_buffer[ 0 ] = static_cast< D2 >( z[ i ] );
+ OP::apply( left_buffer, right_buffer, result_buffer );
+ z[ i ] = static_cast< IOType >( result_buffer[ 0 ] );
+ }
+ }
+
+ /**
+ * Out-of-place element-wise foldr function. Calculates
+ * \f$ \forall\ i \in \{ 0, 1, \ldots, n - 1 \}, \f$
+ * \f$ x \odot z_i \f$ and stores the result into
+ * \f$ z_i \f$.
+ *
+ * @tparam InputType The type of elements in \a x.
+ * @tparam IOType The type of elements in \a z.
+ *
+ * @param x The left-hand side input value.
+ * @param z Where \a x shall be mapped into.
+ * @param n How many data elements \a z contains.
+ *
+ * This version requires two buffers and streams \a z
+ * twice (once for reading, once for writing).
+ */
+ template< typename InputType, typename IOType >
+ static void eWiseFoldrSA( const InputType x, IOType * __restrict__ const z, const size_t n ) {
+ // local buffers
+ typedef typename OperatorBase< OP >::D1 D1;
+ typedef typename OperatorBase< OP >::D2 D2;
+ typedef typename OperatorBase< OP >::D3 D3;
+ const D1 left_buffer = x; // this is actually mandatory in case x is a temporary
+ D2 right_buffer[ OperatorBase< OP >::blocksize ];
+ D3 result_buffer[ OperatorBase< OP >::blocksize ];
+
+ // blockwise application
+ size_t i = 0;
+ while( i + OperatorBase< OP >::blocksize <= n ) {
+ // load into buffers
+ for( size_t b = 0; b < OperatorBase< OP >::blocksize; ++i, ++b ) {
+ right_buffer[ b ] = static_cast< D2 >( z[ i ] );
+ }
+
+ // rewind source and output
+ i -= OperatorBase< OP >::blocksize;
+
+ // operate within buffer
+ for( size_t b = 0; b < OperatorBase< OP >::blocksize; ++b ) {
+ OP::apply( &left_buffer, &( right_buffer[ b ] ), &( result_buffer[ b ] ) );
+ }
+
+ // write back result
+ for( size_t b = 0; b < OperatorBase< OP >::blocksize; ++i, ++b ) {
+ z[ i ] = static_cast< IOType >( result_buffer[ b ] );
+ }
+ }
+
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ right_buffer[ 0 ] = static_cast< D2 >( z[ i ] );
+ OP::apply( &left_buffer, right_buffer, result_buffer );
+ z[ i ] = static_cast< IOType >( result_buffer[ 0 ] );
+ }
+ }
+ };
+
+ /**
+ * This class provides an in-place foldr implementation for Base Operators
+ * that are fold-right capable given its provided domains. It also implements
+ * an eWiseApply function that requires two buffers by exploiting the
+ * in-place foldr operator. Without an in-place foldr, it is still possible
+ * to implement an eWiseApply using two buffers if there is an in-place foldl
+ * added via OperatorFL. If that also fails, the eWiseApply function will be
+ * implemented using three buffers via OperatorNoFRFL.
+ *
+ * @tparam OP The generic operator implementation.
+ *
+ * @see Operator for full details.
+ * @see OperatorFR for details on fold-right capable operators and behaviour
+ * for non fold-right capable operators.
+ * @see OperatorBase for additional functions exposed to the final operator.
+ */
+ template< typename OP >
+ class OperatorFR< OP, typename std::enable_if< OP::has_foldr && std::is_same< typename OP::right_type, typename OP::result_type >::value >::type > : public OperatorBase< OP > {
+
+ private:
+ typedef typename OperatorBase< OP >::D1 D1;
+ typedef typename OperatorBase< OP >::D3 D3;
+ static constexpr size_t blocksize = OperatorBase< OP >::blocksize;
+
+ public:
+ /**
+ * In-place application of this operator on two data elements.
+ *
+ * Computes \f$ x \odot y \f$ and writes the result into \f$ y \f$.
+ *
+ * \note This variant is only called when the underlying raw operator
+ * supports in-place operations.
+ *
+ * The caller must ensure the appropriate domains and casting behaviour
+ * is applicable. Note that a user is never to call these functions
+ * explicitly.
+ *
+ * @param[in] x The value that is to be applied to \a y.
+ * @param[in,out] y The value \a x is to be applied against.
+ */
+ static void foldr( const D1 & x, D3 & y ) {
+ OP::foldr( &x, &y );
+ }
+
+ /**
+ * In-place element-wise foldr function. Calculates
+ * \f$\forall\ i \in \{ 0, 1, \ldots, n - 1 \}, \f$
+ * \f$ x \odot z_i \f$ and stores the result into \f$ z_i \f$.
+ *
+ * @tparam InputType The type of \a x.
+ * @tparam IOType The type of elements in \a z.
+ *
+ * @param[in] x The left-hand side input value.
+ * @param[in,out] z Where \a x shall be mapped into.
+ * @param[in] n How many data elements \a z contains.
+ *
+ * This implementation requires one buffers only. It streams \a z twice,
+ * once for reading, once for writing. This function should vectorise.
+ */
+ template< typename InputType, typename IOType >
+ static void eWiseFoldrSA( const InputType x, IOType * __restrict__ const z, const size_t n ) {
+ // local buffers
+ const D1 left_buffer = static_cast< D1 >( x );
+ D3 result_buffer[ blocksize ];
+
+ // blockwise application
+ size_t i = 0;
+ while( i + blocksize <= n ) {
+ // load into buffers
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ result_buffer[ b ] = static_cast< D3 >( z[ i ] );
+ }
+
+ // rewind source and output
+ i -= blocksize;
+
+ // operate within buffer
+ for( size_t b = 0; b < blocksize; ++b ) {
+ OP::foldr( &left_buffer, &( result_buffer[ b ] ) );
+ }
+
+ // write back result
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ z[ i ] = static_cast< IOType >( result_buffer[ b ] );
+ }
+ }
+
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ result_buffer[ 0 ] = static_cast< D3 >( z[ i ] );
+ OP::foldr( &left_buffer, result_buffer );
+ z[ i ] = static_cast< IOType >( result_buffer[ 0 ] );
+ }
+ }
+
+ /**
+ * In-place element-wise foldr function. Calculates
+ * \f$\forall\ i \in \{ 0, 1, \ldots, n - 1 \}, \f$
+ * \f$ x_i \odot z_i \f$ and stores the result into \f$ z_i \f$.
+ *
+ * @tparam InputType The type of elements in \a x.
+ * @tparam IOType The type of elements in \a z.
+ *
+ * @param[in] x The left-hand side input data.
+ * @param[in,out] z Where \a x shall be mapped into.
+ * @param[in] n How many data elements \a x and \a z contain.
+ *
+ * This implementation requires two buffers only. It streams \a x once,
+ * while streaming \a z twice (once for reading, once for writing). This
+ * function should vectorise.
+ */
+ template< typename InputType, typename IOType >
+ static void eWiseFoldrAA( const InputType * __restrict__ const x, IOType * __restrict__ const z, const size_t n ) {
+ // local buffers
+ D1 left_buffer[ blocksize ];
+ D3 result_buffer[ blocksize ];
+
+ // blockwise application
+ size_t i = 0;
+ while( i + blocksize <= n ) {
+ // load into buffers
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ left_buffer[ b ] = static_cast< D1 >( x[ i ] );
+ result_buffer[ b ] = static_cast< D3 >( z[ i ] );
+ }
+
+ // rewind source and output
+ i -= blocksize;
+
+ // operate within buffer
+ for( size_t b = 0; b < blocksize; ++b ) {
+ OP::foldr( &( left_buffer[ b ] ), &( result_buffer[ b ] ) );
+ }
+
+ // write back result
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ z[ i ] = static_cast< IOType >( result_buffer[ b ] );
+ }
+ }
+
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ left_buffer[ 0 ] = static_cast< D1 >( x[ i ] );
+ result_buffer[ 0 ] = static_cast< D3 >( z[ i ] );
+ OP::foldr( left_buffer, result_buffer );
+ z[ i ] = static_cast< IOType >( result_buffer[ 0 ] );
+ }
+ }
+
+ /**
+ * In-place element-wise apply function. Calculates
+ * \f$\forall\ i \in \{ 0, 1, \ldots, n - 1 \}, \f$
+ * \f$ z_i = x_i \odot y_i \f$.
+ *
+ * @tparam InputType1 The type of elements in \a x.
+ * @tparam InputType2 The type of elements in \a y.
+ * @tparam OutputType The type of elements in \a z.
+ *
+ * If \a InputType2 and \a D3 are not the same, then the existing data in
+ * \a y is cast to \a D3 prior to application of this in-place operator.
+ * If \a InputType1 and \a D1 are not the same, then the existing data in
+ * \a x are cast to \a D1 prior to application of this in-place operator.
+ * If \a OutputType and \a D3 are not the same, then the results of
+ * applying this operator are cast to \a OutputType prior to writing back
+ * the results.
+ *
+ * \warning The first casting behaviour may not be what you want. The two
+ * other casting behaviours are allowed by the GraphBLAS unless
+ * the alp::descriptor::no_casting is given.
+ *
+ * \note By default, this GraphBLAS implementation will only use this
+ * code when \a D2 matches \a D3 and OP::has_foldr is \a true.
+ *
+ * This implementation relies on an in-place foldr().
+ *
+ * @param[in] x The left-hand side input data. The memory range starting
+ * at \a x and ending at \a x + n (exclusive) may not
+ * overlap with the memory area starting at \a z and ending
+ * at \a z + n (exclusive).
+ * @param[in] y The right-hand side input data. The memory range starting
+ * at \a y and ending at \a y + n (exclusive) may not
+ * overlap with the memory area starting at \a z and ending
+ * at \a z + n.
+ * @param[out] z Where the map of \a x into \a y must be stored. This
+ * pointer is restricted in the sense that its memory may
+ * never overlap with those pointed to by \a x or \y, as
+ * detailed above.
+ * @param[in] n How many data elements \a x, \a y, and \a z contain.
+ */
+ template< typename InputType1, typename InputType2, typename OutputType >
+ static void eWiseApply( const InputType1 * x, const InputType2 * y, OutputType * __restrict__ z, const size_t n ) {
+#ifdef _DEBUG
+#ifdef D_ALP_NO_STDIO
+ std::cout << "In OperatorFR::eWiseApply\n";
+#endif
+#endif
+ // NOTE: this variant is only active when the computation can be done using two buffers only
+
+ // local buffers
+ D1 left_buffer[ blocksize ];
+ D3 result_buffer[ blocksize ];
+
+ // blockwise application
+ size_t i = 0;
+ while( i + blocksize <= n ) {
+
+ // load into buffers
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ left_buffer[ b ] = static_cast< D1 >( x[ i ] );
+ result_buffer[ b ] = static_cast< D3 >( y[ i ] );
+ }
+
+ // rewind source and output
+ i -= blocksize;
+
+ // operate within buffer
+ for( size_t b = 0; b < blocksize; ++b ) {
+ OP::foldr( &( left_buffer[ b ] ), &( result_buffer[ b ] ) );
+ }
+
+ // write back result
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ z[ i ] = static_cast< OutputType >( result_buffer[ b ] );
+ }
+ }
+
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ left_buffer[ 0 ] = static_cast< typename OP::left_type >( x[ i ] );
+ result_buffer[ 0 ] = static_cast< typename OP::result_type >( y[ i ] );
+ OP::foldr( left_buffer, result_buffer );
+ z[ i ] = static_cast< OutputType >( result_buffer[ 0 ] );
+ }
+ }
+ };
+
+ /**
+ * A class capable of adding an out-of-place \a foldl function for an
+ * operator that is not fold-left capable, or capable of adding an in-
+ * place foldl function for an operator that is fold-left capable.
+ *
+ * An operator is fold-left capable when the Base Operator \a OP provides
+ * an in-place foldl implementation, \em and whenever \a D2 equals \a D3.
+ * If one of either requirements is not met, then \a OP is not fold-left
+ * capable and this class is selected to add an out-of-place foldl function.
+ *
+ * @tparam OP The generic operator implementation.
+ * @tparam guard This typename is void if and only if \a OP is not fold-
+ * left capable. In this case, this class adds an
+ * out-of-place foldl implementation to the operator.
+ * If \a guard is not void, then this class defines an
+ * in-place foldr implementation instead.
+ *
+ * \note This specific class corresponds to the \a guard variable equal to
+ * \a void.
+ *
+ * @see Operator for full details.
+ * @see OperatorFR for additional functions exposed to the resulting
+ * operator.
+ * @see OperatorBase for additional functions exposed to the resulting
+ * operator.
+ */
+ template< typename OP, typename guard = void, enum Backend implementation = config::default_backend >
+ class OperatorFL : public OperatorFR< OP > {
+
+ private:
+ public:
+ typedef typename OperatorBase< OP >::D1 D1;
+ typedef typename OperatorBase< OP >::D2 D2;
+ typedef typename OperatorBase< OP >::D3 D3;
+ static constexpr size_t blocksize = OperatorBase< OP >::blocksize;
+
+ /**
+ * Emulated in-place application of this operator on two data elements.
+ *
+ * Computes \f$ x \odot y \f$ and writes the result into \f$ x \f$.
+ *
+ * We wish to call this in-place variant internally for brevity. However,
+ * if \a OP has no in-place variant, then we must cache the previous
+ * value of the output element or otherwise we will breach the
+ * __restrict__ contract of OP::apply.
+ * The caller must ensure the appropriate domains and casting behaviour
+ * is applicable. Note that a user is never to call these functions
+ * explicitly.
+ *
+ * @tparam InputType The type of the parameter \a x.
+ * @tparam IOType The type of the parameter \a y.
+ *
+ * \warning Additional casting and use of temporary variables may occur
+ * when \a InputType does not match \a D2 \em or \a IOType
+ * does not match \a D3.
+ *
+ * \note This implementation relies on apply().
+ *
+ * @param[in,out] x The value \a y is to be applied against.
+ * @param[in] y The value that is to be applied to \a x.
+ */
+ template< typename InputType, typename IOType >
+ static void foldl( IOType & x, const InputType & y ) {
+ const D1 cache = static_cast< D1 >( x );
+ OperatorBase< OP >::apply( cache, y, x );
+ }
+
+ /**
+ * Out-of-place element-wise foldl function. Calculates
+ * \f$\forall\ i \in \{ 0, 1, \ldots, n - 1 \}, \f$
+ * \f$ x_i \odot y \f$ and stores the result into \f$ x_i \f$.
+ *
+ * @tparam IOType The type of elements in \a x.
+ * @tparam InputType The type of \a y.
+ *
+ * @param[in, out] x At function entry, the left-hand side input data.
+ * At function exit, the output data as defined above.
+ * @param[in] y The right-hand side input value.
+ * @param[in] n How many data elements \a x contains.
+ *
+ * This version requires two buffers and streams \a x twice (once for
+ * reading, once for writing). This function should vectorise its
+ * out-of-place operations.
+ */
+ template< typename IOType, typename InputType >
+ static void eWiseFoldlAS( IOType * __restrict__ const x, const InputType y, const size_t n ) {
+ // local buffers
+ D1 left_buffer[ blocksize ];
+ const D2 right_buffer = y;
+ D3 result_buffer[ blocksize ];
+
+ // blockwise application
+ size_t i = 0;
+ while( i + blocksize <= n ) {
+ // load into buffers
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ left_buffer[ b ] = static_cast< D1 >( x[ i ] );
+ }
+
+ // rewind source and output
+ i -= blocksize;
+
+ // operate within buffer
+ for( size_t b = 0; b < blocksize; ++b ) {
+ OP::apply( &( left_buffer[ b ] ), &right_buffer, &( result_buffer[ b ] ) );
+ }
+
+ // write back result
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ x[ i ] = static_cast< IOType >( result_buffer[ b ] );
+ }
+ }
+
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ left_buffer[ 0 ] = static_cast< D1 >( x[ i ] );
+ OP::apply( left_buffer, &right_buffer, result_buffer );
+ x[ i ] = static_cast< IOType >( result_buffer[ 0 ] );
+ }
+ }
+
+ /**
+ * Out-of-place element-wise foldl function. Calculates
+ * \f$\forall\ i \in \{ 0, 1, \ldots, n - 1 \}, \f$
+ * \f$ x_i \odot y_i \f$ and stores the result into \f$ x_i \f$.
+ *
+ * @tparam IOType The type of elements in \a x.
+ * @tparam InputType The type of elements in \a y.
+ *
+ * @param[in, out] x At function entry, the left-hand side input data.
+ * At function exit, the output data as defined above.
+ * @param[in] y The right-hand side input.
+ * @param[in] n How many data elements \a x and \a y contain.
+ *
+ * This version requires three buffers, streams \a y once, and streams
+ * \a x twice (once for reading, once for writing). This function should
+ * vectorise its out-of-place operations.
+ */
+ template< typename IOType, typename InputType >
+ static void eWiseFoldlAA( IOType * __restrict__ const x, const InputType * __restrict__ const y, const size_t n ) {
+ // local buffers
+ D1 left_buffer[ blocksize ];
+ D2 right_buffer[ blocksize ];
+ D3 result_buffer[ blocksize ];
+
+ // blockwise application
+ size_t i = 0;
+ while( i + blocksize <= n ) {
+ // load into buffers
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ left_buffer[ b ] = static_cast< D1 >( x[ i ] );
+ right_buffer[ b ] = static_cast< D2 >( y[ i ] );
+ }
+
+ // rewind source and output
+ i -= blocksize;
+
+ // operate within buffer
+ for( size_t b = 0; b < blocksize; ++b ) {
+ OP::apply( &( left_buffer[ b ] ), &( right_buffer[ b ] ), &( result_buffer[ b ] ) );
+ }
+
+ // write back result
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ x[ i ] = static_cast< IOType >( result_buffer[ b ] );
+ }
+ }
+
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ left_buffer[ 0 ] = static_cast< D1 >( x[ i ] );
+ right_buffer[ 0 ] = static_cast< D2 >( y[ i ] );
+ OP::apply( left_buffer, right_buffer, result_buffer );
+ x[ i ] = static_cast< IOType >( result_buffer[ 0 ] );
+ }
+ }
+ };
+
+ /**
+ * This class provides an in-place foldl implementation for Base Operators
+ * that are fold-left capable given its provided domains.
+ *
+ * @tparam OP The generic operator implementation.
+ *
+ * @see Operator for full details.
+ * @see OperatorFL for details on fold-right capable operators and behaviour
+ * for non fold-right capable operators.
+ * @see OperatorFR for additional functions exposed to the resulting
+ * operator.
+ * @see OperatorBase for additional functions exposed to the resulting
+ * operator.
+ */
+ template< typename OP >
+ class OperatorFL< OP, typename std::enable_if< OP::has_foldl && std::is_same< typename OP::left_type, typename OP::result_type >::value >::type > : public OperatorFR< OP > {
+
+ private:
+ public:
+ typedef typename OperatorBase< OP >::D2 D2;
+ typedef typename OperatorBase< OP >::D3 D3;
+ static constexpr size_t blocksize = OperatorBase< OP >::blocksize;
+
+ /**
+ * In-place application of this operator on two data elements.
+ *
+ * Computes \f$ x \odot y \f$ and writes the result into \f$ x \f$.
+ *
+ * \note This variant is only called when the underlying raw operator
+ * supports in-place operations.
+ *
+ * The caller must ensure the appropriate domains and casting behaviour
+ * is applicable. Note that a user is never to call these functions
+ * explicitly.
+ *
+ * @param[in,out] x The value \a y is to be applied against.
+ * @param[in] y The value that is to be applied to \a x.
+ */
+ static void foldl( D3 & x, const D2 & y ) {
+ OP::foldl( &x, &y );
+ }
+
+ /**
+ * In-place element-wise foldl function. Calculates
+ * \f$\forall\ i \in \{ 0, 1, \ldots, n - 1 \}, \f$
+ * \f$ x_i \odot y_i \f$ and stores the result into \f$ x_i \f$.
+ *
+ * @tparam IOType The type of elements in \a x.
+ * @tparam InputType The type of elements in \a y.
+ *
+ * @param[in,out] x At function extry: the left-hand side input data.
+ * At function exit: the result data.
+ * @param[in] y The right-hand side input data.
+ * @param[in] n How many data elements \a x and \a y contain.
+ *
+ * This implementation requires two buffers only. It streams \a y once,
+ * while streaming \a x twice (once for reading, once for writing). This
+ * function should vectorise.
+ */
+ template< typename InputType, typename IOType >
+ static void eWiseFoldlAA( IOType * __restrict__ const x, const InputType * __restrict__ const y, const size_t n ) {
+ // local buffers
+ D2 right_buffer[ blocksize ];
+ D3 result_buffer[ blocksize ];
+
+ // blockwise application
+ size_t i = 0;
+ while( i + blocksize <= n ) {
+ // load into buffers
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ right_buffer[ b ] = static_cast< D2 >( y[ i ] );
+ result_buffer[ b ] = static_cast< D3 >( x[ i ] );
+ }
+
+ // rewind source and output
+ i -= blocksize;
+
+ // operate within buffer
+ for( size_t b = 0; b < blocksize; ++b ) {
+ OP::foldl( &( result_buffer[ b ] ), &( right_buffer[ b ] ) );
+ }
+
+ // write back result
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ x[ i ] = static_cast< IOType >( result_buffer[ b ] );
+ }
+ }
+
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ right_buffer[ 0 ] = static_cast< D2 >( y[ i ] );
+ result_buffer[ 0 ] = static_cast< D3 >( x[ i ] );
+ OP::foldl( result_buffer, right_buffer );
+ x[ i ] = static_cast< IOType >( result_buffer[ 0 ] );
+ }
+ }
+
+ /**
+ * In-place element-wise foldl function. Calculates
+ * \f$ \forall\ i \in \{ 0, 1, \ldots, n - 1 \}, \f$
+ * \f$ x_i \odot y \f$ and stores the result into \f$ x_i \f$.
+ *
+ * @tparam IOType The type of elements in \a x.
+ * @tparam InputType The type of \a y.
+ *
+ * @param[in,out] x At function extry: the left-hand side input data.
+ * At function exit: the result data.
+ * @param[in] y The right-hand side input value.
+ * @param[in] n How many data elements \a x contains.
+ *
+ * This implementation requires one buffers only. It streams \a x twice
+ * (once for reading, once for writing). This function should vectorise.
+ */
+ template< typename InputType, typename IOType >
+ static void eWiseFoldlAS( IOType * __restrict__ const x, const InputType y, const size_t n ) {
+ // local buffers
+ const D2 right_buffer = static_cast< D2 >( y );
+ D3 result_buffer[ blocksize ];
+
+ // blockwise application
+ size_t i = 0;
+ while( i + blocksize <= n ) {
+ // load into buffers
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ result_buffer[ b ] = static_cast< D3 >( x[ i ] );
+ }
+
+ // rewind source and output
+ i -= blocksize;
+
+ // operate within buffer
+ for( size_t b = 0; b < blocksize; ++b ) {
+ OP::foldl( &( result_buffer[ b ] ), &right_buffer );
+ }
+
+ // write back result
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ x[ i ] = static_cast< IOType >( result_buffer[ b ] );
+ }
+ }
+
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ result_buffer[ 0 ] = static_cast< D3 >( x[ i ] );
+ OP::foldl( result_buffer, &right_buffer );
+ x[ i ] = static_cast< IOType >( result_buffer[ 0 ] );
+ }
+ }
+ };
+
+ /**
+ * A class capable of adding an in-place \a eWiseApply function for an
+ * operator that is fold-left capable but not fold-right capable.
+ *
+ * Like OperatorFR on an fold-right capable operator, this class is
+ * capable of providing an eWiseApply function that requires only two
+ * internal buffers by making use of the in-place foldl.
+ *
+ * @tparam OP The generic operator implementation.
+ * @tparam guard This typename is void if and only if \a OP is fold-left
+ * capable but \em not fold-right capable. In this case,
+ * this class adds nothing to the resulting operator.
+ * If \a guard is not void, however, then this class adds an
+ * in-place eWiseApply implementation to this operator
+ * instead.
+ *
+ * @see Operator for full details.
+ * @see OperatorFL for additional functions exposed to the resulting
+ * operator.
+ * @see OperatorFR for additional functions exposed to the resulting
+ * operator and an alternative way of providing a more
+ * efficient eWiseApply.
+ * @see OperatorBase for additional functions exposed to the resulting
+ * operator.
+ */
+ template< typename OP, typename guard = void, enum Backend implementation = config::default_backend >
+ class OperatorNoFR : public OperatorFL< OP > {};
+
+ /**
+ * This class provides an in-place eWiseApply implementation for Base
+ * Operators that are fold-left capable given its provided domains, but not
+ * fold-right capable. This implementation uses two internal buffers and
+ * relies on an in-place foldl. If this were not possible, then the
+ * eWiseApply will be provided by OperatorNoFRFL in an implementation that
+ * requires three buffers and out-of-place operations instead.
+ *
+ * @tparam OP The generic operator implementation.
+ *
+ * @see Operator for full details.
+ * @see OperatorFL for additional functions exposed to the resulting
+ * operator.
+ * @see OperatorFR for additional functions exposed to the resulting
+ * operator.
+ * @see OperatorBase for additional functions exposed to the resulting
+ * operator.
+ */
+ template< typename OP >
+ class OperatorNoFR< OP, typename std::enable_if< OP::has_foldl && ! ( OP::has_foldr ) && std::is_same< typename OP::left_type, typename OP::result_type >::value >::type > :
+ public OperatorFL< OP > {
+
+ private:
+ public:
+ typedef typename OperatorBase< OP >::D2 D2;
+ typedef typename OperatorBase< OP >::D3 D3;
+ static constexpr size_t blocksize = OperatorBase< OP >::blocksize;
+
+ /**
+ * In-place element-wise apply function. Calculates
+ * \f$\forall\ i \in \{ 0, 1, \ldots, n - 1 \}, \f$
+ * \f$ z_i = x_i \odot y_i \f$.
+ *
+ * @tparam InputType1 The type of elements in \a x.
+ * @tparam InputType2 The type of elements in \a y.
+ * @tparam OutputType The type of elements in \a z.
+ *
+ * If the \a InputType1 and \a D3 are not the same, then the existing data
+ * in \a x is cast to \a D3 prior to application of this operator.
+ * If \a InputType2 and \a D2 are not the same, then the existing data in
+ * \a y is cast to \a D2 prior to application of this operator.
+ * If \a OutputType and \a D3 are not the same, then the result of
+ * applications of this operator are cast to \a OutputType prior to
+ * writing it back to \a z.
+ *
+ * \warning The first casting behaviour may not be what you want. The two
+ * other casting behaviours are allowed by the GraphBLAS unless
+ * the alp::descriptor::no_casting is given.
+ *
+ * \note By default, this GraphBLAS implementation will only use this
+ * code when \a D1 matches \a D3 and OP::has_foldr is \a true.
+ * However, this implementation will never be enabled if \a D2
+ * equals \a D3 and OP::has_foldl is \a true.
+ *
+ * This implementation relies on an in-place foldl().
+ *
+ * @param[in] x The left-hand side input data. The memory range starting
+ * at \a x and ending at \a x + n (exclusive) may not
+ * overlap with the memory area starting at \a z and ending
+ * at \a z + n (exclusive).
+ * @param[in] y The right-hand side input data. The memory range starting
+ * at \a y and ending at \a y + n (exclusive) may not
+ * overlap with the memory area starting at \a z and ending
+ * at \a z + n.
+ * @param[out] z Where the map of \a x into \a y must be stored. This
+ * pointer is restricted in the sense that its memory may
+ * never overlap with those pointed to by \a x or \y, as
+ * detailed above.
+ * @param[in] n How many data elements \a x, \a y, and \a z contain.
+ */
+ template< typename InputType1, typename InputType2, typename OutputType >
+ static void eWiseApply( const InputType1 * x, const InputType2 * y, OutputType * __restrict__ z, const size_t n ) {
+#ifdef _DEBUG
+#ifdef D_ALP_NO_STDIO
+ std::cout << "In OperatorNoFR::eWiseApply\n";
+#endif
+#endif
+ // NOTE: this variant is only active when the computation can be done using two buffers only
+
+ // local buffers
+ D2 right_buffer[ blocksize ];
+ D3 result_buffer[ blocksize ];
+
+ // blockwise application
+ size_t i = 0;
+ while( i + blocksize <= n ) {
+
+ // load into buffers
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ right_buffer[ b ] = static_cast< D2 >( y[ i ] );
+ result_buffer[ b ] = static_cast< D3 >( x[ i ] );
+ }
+
+ // rewind source and output
+ i -= blocksize;
+
+ // operate within buffer
+ for( size_t b = 0; b < blocksize; ++b ) {
+ OP::foldl( &( result_buffer[ b ] ), &( right_buffer[ b ] ) );
+ }
+
+ // write back result
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ z[ i ] = static_cast< OutputType >( result_buffer[ b ] );
+ }
+ }
+
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ right_buffer[ 0 ] = static_cast< D2 >( y[ i ] );
+ result_buffer[ 0 ] = static_cast< D3 >( x[ i ] );
+ OP::foldl( result_buffer, right_buffer );
+ z[ i ] = static_cast< OutputType >( result_buffer[ 0 ] );
+ }
+ }
+ };
+
+ /**
+ * A class capable of adding an out-of-place \a eWiseApply function for an
+ * operator that, given its domains, is not fold-left capable \em an not
+ * fold-right capable.
+ *
+ * If the given operator is not fold-left and not fold-right capable, then
+ * both OperatorFR and OperatorNoFR have not yet added an eWiseApply
+ * implementation. However, if there was already an in-place foldr or an
+ * in-place foldl available, then this class will add no new functions to
+ * the resulting operator.
+ * A class capable of adding an out-of-place eWiseApply function for an
+ * operator that is not fold-left capable \em and not fold-right capable.
+ *
+ * @tparam OP The generic operator implementation.
+ * @tparam guard This typename is void if and only if there is already an
+ * in-place eWiseApply defined by the base OperatorNoFR
+ * class or by the OperatorFR class. In this case, this
+ * class does not add any new public methods.
+ * If it is not void, then this class defines an
+ * out-of-place eWiseApply function.
+ *
+ * \note This specific class corresponds to the \a guard variable equal to
+ * \a void.
+ *
+ * @see Operator for full details.
+ * @see OperatorNoFR for additional functions exposed to the resulting
+ * operator.
+ * @see OperatorFL for additional functions exposed to the resulting
+ * operator.
+ * @see OperatorFR for additional functions exposed to the resulting
+ * operator and an alternative way of providing a more
+ * efficient eWiseApply.
+ * @see OperatorBase for additional functions exposed to the resulting
+ * operator.
+ */
+ template< typename OP, typename guard = void, enum Backend implementation = config::default_backend >
+ class OperatorNoFRFL : public OperatorNoFR< OP > {};
+
+ /**
+ * A class that adds an out-of-place \a eWiseApply function for an operator
+ * that, given its domains, is not fold-left capable \em and not fold-right
+ * capable.
+ *
+ * Contains further specialisations for an operator that is not fold-left,
+ * capable \em and not fold-right capable. This means we have to supply an
+ * eWiseApply function that uses the normal OperatorBase::apply function,
+ * and thus uses three buffers instead of the two buffers required by its
+ * in-place counterparts.
+ *
+ * @tparam OP The generic operator implementation.
+ *
+ * @see Operator for full details.
+ * @see OperatorNoFR for additional functions exposed to the resulting
+ * operator.
+ * @see OperatorFL for additional functions exposed to the resulting
+ * operator.
+ * @see OperatorFR for additional functions exposed to the resulting
+ * operator and an alternative way of providing a more
+ * efficient eWiseApply.
+ * @see OperatorBase for additional functions exposed to the resulting
+ * operator and the OperatorBase::apply function this
+ * class will use.
+ */
+ template< typename OP >
+ class OperatorNoFRFL< OP,
+ typename std::enable_if< ( ! ( OP::has_foldl ) || ! ( std::is_same< typename OP::left_type, typename OP::result_type >::value ) ) &&
+ ( ! ( OP::has_foldr ) || ! ( std::is_same< typename OP::right_type, typename OP::result_type >::value ) ) >::type > : public OperatorNoFR< OP > {
+
+ private:
+ public:
+ typedef typename OperatorBase< OP >::D1 D1;
+ typedef typename OperatorBase< OP >::D2 D2;
+ typedef typename OperatorBase< OP >::D3 D3;
+ static constexpr size_t blocksize = OperatorBase< OP >::blocksize;
+
+ /** \anchor OperatorNoFRFLeWiseApply
+ *
+ * Standard out-of-place element-wise apply function. Calculates
+ * \f$\forall\ i \in \{ 0, 1, \ldots, n - 1 \}, \f$
+ * \f$ z_i = x_i \odot y_i \f$.
+ *
+ * This is the non-public variant that operates on raw arrays.
+ *
+ * @tparam InputType1 The type of elements in \a x.
+ * @tparam InputType2 The type of elements in \a y.
+ * @tparam OutputType The type of elements in \a z.
+ *
+ * If \a InputType1 and \a D1 are not the same, then the existing data in
+ * \a x will be cast to \a D1 prior to application of this operator.
+ * If \a InputType2 and \a D2 are not the same, then the existing data in
+ * \a y will be cast to \a D2 prior to application of this operator.
+ * If \a OutputType and \a D3 are not the same, then the results of
+ * applications of this operator are cast to \a OutputType prior to
+ * writing them back to \a z.
+ *
+ * \note The GraphBLAS can explicitly control all \em three of this
+ * casting behaviours via alp::descriptors::no_casting.
+ *
+ * \warning With the in-place variants of this code, unwanted behaviour
+ * cannot be prevented by use of alp::descriptors::no_casting.
+ * Therefore the current implementation only calls the in-place
+ * variants when \a D1 equals \a D3 (for foldl-based in-place),
+ * or when \a D2 equals \a D3 (for foldr-based ones).
+ *
+ * @param[in] x The left-hand side input data. The memory range starting
+ * at \a x and ending at \a x + n (exclusive) may not
+ * overlap with the memory area starting at \a z and ending
+ * at \a z + n (exclusive).
+ * @param[in] y The right-hand side input data. The memory range starting
+ * at \a y and ending at \a y + n (exclusive) may not
+ * overlap with the memory area starting at \a z and ending
+ * at \a z + n.
+ * @param[out] z Where the map of \a x into \a y must be stored. This
+ * pointer is restricted in the sense that its memory may
+ * never overlap with those pointed to by \a x or \y, as
+ * detailed above.
+ * @param[in] n How many data elements \a x, \a y, and \a z contain.
+ */
+ template< typename InputType1, typename InputType2, typename OutputType >
+ static void eWiseApply( const InputType1 * x, const InputType2 * y, OutputType * __restrict__ z, const size_t n ) {
+#ifdef _DEBUG
+#ifdef D_ALP_NO_STDIO
+ std::cout << "In OperatorNoFRFL::eWiseApply\n";
+#endif
+#endif
+ // NOTE: this variant is only active when the computation can NOT be done using two buffers only
+
+ // local buffers
+ D1 left_buffer[ blocksize ];
+ D2 right_buffer[ blocksize ];
+ D3 result_buffer[ blocksize ];
+
+ // blockwise application
+ size_t i = 0;
+ while( i + blocksize <= n ) {
+
+ // load into buffers
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ left_buffer[ b ] = static_cast< D1 >( x[ i ] );
+ right_buffer[ b ] = static_cast< D2 >( y[ i ] );
+ }
+
+ // rewind source and output
+ i -= blocksize;
+
+ // operate within buffer
+ for( size_t b = 0; b < blocksize; ++b ) {
+ OP::apply( &( left_buffer[ b ] ), &( right_buffer[ b ] ), &( result_buffer[ b ] ) );
+ }
+
+ // write back result
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ z[ i ] = static_cast< OutputType >( result_buffer[ b ] );
+ }
+ }
+
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ left_buffer[ 0 ] = static_cast< D1 >( x[ i ] );
+ right_buffer[ 0 ] = static_cast< D2 >( y[ i ] );
+ OP::apply( left_buffer, right_buffer, result_buffer );
+ z[ i ] = static_cast< OutputType >( result_buffer[ 0 ] );
+ }
+ }
+ };
+
+ /**
+ * This is the operator interface exposed to the GraphBLAS implementation.
+ *
+ * \warning Note that most GraphBLAS usage requires associative operators.
+ * While very easily possible to create non-associative operators
+ * using this interface, passing them to GraphBLAS functions,
+ * either explicitly or indirectly (by, e.g., including them in a
+ * alp::Monoid or alp::Semiring), will lead to undefined
+ * behaviour.
+ *
+ * This class wraps around a base operator of type \a OP we denote by
+ * \f$ \odot:\ D_1\times D_2 \to D_3 \f$.
+ *
+ * \parblock
+ * \par Base Operators
+ *
+ * The class \a OP is expected to define the following public function:
+ * - \a apply, which takes three pointers to parameters \f$ x \in D_1 \f$
+ * \f$ y \in D_2 \f$, and \f$ z \in D_3 \f$ and computes
+ * \f$ z = x \odot y \f$.
+ *
+ * It is also expected to define the following types:
+ * - \a left_type, which corresponds to \f$ D_1 \f$,
+ * - \a right_type, which corresponds to \f$ D_2 \f$,
+ * - \a result_type, which corresponds to \f$ D_3 \f$.
+ *
+ * It is also expected to define the following two public boolean fields:
+ * - \a has_foldr
+ * - \a has_foldl
+ *
+ * If \a has_foldr is \a true, then the class \a OP is expected to also
+ * define the function
+ * - foldr, which takes two pointers to parameters \f$ x \in D_1 \f$
+ * and \f$ z \in D_2 \subseteq D_3 \f$ and stores in \a z the result of
+ * \f$ x \odot z \f$.
+ *
+ * If \a has_foldl is \a true, the the class \a OP is expected to also
+ * define the function
+ * - foldl, which takes two pointers to parameters
+ * \f$ z \in D_1 \subseteq D_3 \f$ and \f$ y \in D_2 \f$ and stores in
+ * \a z the result of \f$ z \odot y \f$.
+ *
+ * For examples of these base operators, see alp::operators::internal::max
+ * or alp::operators::internal::mul. An example of a full implementation,
+ * in this case for numerical addition, is the following:
+ *
+ * \snippet internalops.hpp Example Base Operator Implementation
+ *
+ * \note GraphBLAS users should never call these functions directly. This
+ * documentation is provided for developers to understand or extend
+ * the current implementation, for example to include new operators.
+ *
+ * \warning When calling these functions directly, note that the pointers
+ * to the memory areas are declared using the \em restrict key
+ * word. One of the consequences is that all pointers given in a
+ * single call may never refer to the same memory area, or
+ * undefined behaviour is invoked .
+ *
+ * \endparblock
+ *
+ * \parblock
+ * \par The exposed GraphBLAS Operator Interface
+ *
+ * The Base Operators as illustrated above are wrapped by this class to
+ * provide a more convient API. It translates the functionality of any Base
+ * Operator and exposes the following interface instead:
+ *
+ * -# apply, which takes three parameters \f$ x, y, z \f$ of arbitrary
+ * types and computes \f$ z = x \odot y \f$ after performing any
+ * casting if required.
+ * -# foldr, which takes two parameters \f$ x, z \f$ of arbitrary types
+ * and computes \f$ z = x \odot z \f$ after performing any casting if
+ * required.
+ * -# foldl, which takes two parameters \f$ z, y \f$ of arbitrary types
+ * and computes \f$ z = z \odot y \f$ after performing any casting if
+ * required.
+ * -# eWiseApply, which takes three pointers to arrays \f$ x, y, z \f$
+ * and a size \a n. The arrays can correspond to elements of any type,
+ * all three with length at least \a n. For every i-th element of the
+ * three arrays, on the values \f$ x_i, y_i, z_i \f$, \f$ z_i \f$ will
+ * be set to \f$ x_i \odot y_i \f$.
+ * -# foldrArray, which takes a pointer to an array \f$ x \f$, a
+ * parameter \f$ z \f$ of arbitrary type, and a size \n as parameters.
+ * The value \f$ z \f$ will be overwritten to \f$ x_i \odot z \f$ for
+ * each of the \f$ i \in \{ 0, 1, \ldots, n-1 \} \f$. The order of
+ * application, in the sense of which \f$ i \f$ are processed first,
+ * is undefined.
+ * -# foldlArray, which takes as parameters: \f$ z \f$ of arbitrary type,
+ * an array \f$ y \f$, and a size \n. The value \f$ z \f$ will be
+ * overwritten to \f$ z \odot y_i \f$ for each of the
+ * \f$ i \in \{ 0, 1, \ldots, n-1 \} \f$. The order of application, in
+ * the sense of which \f$ i \f$ are processed first, is undefined.
+ * \endparblock
+ *
+ * \note This class only allows wrapping of stateless base operators. This
+ * GraphBLAS implementation in principle allows for stateful
+ * operators, though they must be provided by a specialised class
+ * which directly implements the above public interface.
+ *
+ * @see OperatorBase::apply
+ * @see OperatorFR::foldr
+ * @see OperatorFL::foldl
+ * @see \ref OperatorNoFRFLeWiseApply
+ * @see Operator::foldrArray
+ * @see Operator::foldlArray
+ *
+ * \parblock
+ * \par Providing New Operators
+ *
+ * New operators are easily added to this
+ * GraphBLAS implementation by providing a base operator and wrapping this
+ * class around it, as illustrated, e.g., by alp::operators::add as follows:
+ *
+ * \snippet ops.hpp Operator Wrapping
+ *
+ * This need to be compatible with the GraphBLAS type traits, specifically,
+ * the #is_operator template. To ensure this, a specialisation of it must be
+ * privided:
+ *
+ * \snippet ops.hpp Operator Type Traits
+ * \endparblock
+ */
+ template< typename OP, enum Backend implementation = config::default_backend >
+ class Operator : public OperatorNoFRFL< OP > {
+
+ private:
+ public:
+ /** The maximum block size when vectorising this operation. */
+ static constexpr size_t blocksize = OperatorBase< OP >::blocksize;
+
+ /** The left-hand side input domain of this operator. */
+ typedef typename OperatorBase< OP >::D1 D1;
+
+ /** The right-hand side input domain of this operator. */
+ typedef typename OperatorBase< OP >::D2 D2;
+
+ /** The output domain of this operator. */
+ typedef typename OperatorBase< OP >::D3 D3;
+
+ /**
+ * Reduces a vector of type \a InputType into a value in \a IOType
+ * by repeated application of this operator. The \a IOType is cast
+ * into \a D3 prior reduction. The \a InputType is cast into \a D1
+ * during reduction. The final result is cast to IOType after
+ * reduction. The reduction happens `right-to-left'.
+ *
+ * This implementation relies on the \a foldr, whether it be an
+ * true in-place or emulated version.
+ *
+ * @param[in,out] out On input, the initial value to be used for
+ * reduction. On output, all elements of \a x
+ * have been applied to \a out.
+ * @param[in] x A vector of size \a n with elements of type \a left_type.
+ * @param[in] n A positive integer (can be 0).
+ */
+ template< typename IOType, typename InputType >
+ static void foldrArray( const InputType * __restrict__ const x, IOType & out, const size_t n ) {
+ // prepare scalar buffer
+ D3 reduced = static_cast< D3 >( out );
+ // prepare vectorisation buffer
+ D1 left_buffer[ blocksize ];
+ // blockwise application
+ size_t i = n - 1;
+ while( i - blocksize + 1 < n ) {
+ // load into buffer
+ for( size_t b = 0; b < blocksize; --i, ++b ) {
+ left_buffer[ b ] = static_cast< D1 >( x[ i ] );
+ }
+ // do reduce
+ for( size_t b = 0; b < blocksize; ++b ) {
+ OP::foldr( &( left_buffer[ b ] ), &reduced );
+ }
+ }
+ // direct application for remainder
+ for( ; i < n; --i ) {
+ left_buffer[ 0 ] = static_cast< D1 >( x[ i ] );
+ OP::foldr( left_buffer, &reduced );
+ }
+ // write out
+ out = static_cast< IOType >( reduced );
+ }
+
+ /**
+ * Reduces a vector of type \a InputType into a value in \a IOType
+ * by repeated application of this operator. The \a IOType is cast
+ * into \a D3 prior reduction. The \a InputType is cast into \a D2
+ * during reduction. The final result is cast to IOType after
+ * reduction. The reduction happens `left-to-right'.
+ *
+ * This implementation relies on the \a foldr, whether it be an
+ * true in-place or emulated version.
+ *
+ * @param[in,out] out On input, the initial value to be used for
+ * reduction. On output, all elements of \a x
+ * have been applied to \a out.
+ * @param[in] x A vector of size \a n with elements of type \a left_type.
+ * @param[in] n A positive integer (can be 0).
+ */
+ template< typename IOType, typename InputType >
+ static void foldlArray( IOType & out, const InputType * __restrict__ const x, const size_t n ) {
+ // prepare scalar buffer
+ D3 reduced = static_cast< D3 >( out );
+ // prepare vectorisation buffer
+ D2 right_buffer[ blocksize ];
+ // blockwise application
+ size_t i = 0;
+ while( i + blocksize <= n ) {
+ // load into buffer
+ for( size_t b = 0; b < blocksize; ++i, ++b ) {
+ right_buffer[ b ] = static_cast< D2 >( x[ i ] );
+ }
+ // do reduce
+ for( size_t b = 0; b < blocksize; ++b ) {
+ OP::foldl( &reduced, &( right_buffer[ b ] ) );
+ }
+ }
+ // direct application for remainder
+ for( ; i < n; ++i ) {
+ right_buffer[ 0 ] = static_cast< D2 >( x[ i ] );
+ OP::foldl( &reduced, right_buffer );
+ }
+ // write out
+ out = static_cast< IOType >( reduced );
+ }
+ };
+
+ } // namespace internal
+
+ } // namespace operators
+
+} // namespace alp
+
+#endif // _H_ALP_INTERNAL_OPERATORS_BASE
+
diff --git a/include/alp/base/internalrels.hpp b/include/alp/base/internalrels.hpp
new file mode 100644
index 000000000..4ca19a996
--- /dev/null
+++ b/include/alp/base/internalrels.hpp
@@ -0,0 +1,835 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author D. G. Spampinato
+ * @date 2nd of November, 2022
+ */
+
+#ifndef _H_ALP_INTERNAL_RELATIONS_BASE
+#define _H_ALP_INTERNAL_RELATIONS_BASE
+
+#include
+
+#include
+#include
+
+#include "internalops.hpp"
+
+
+namespace alp {
+
+ namespace relations {
+
+ /** Core implementations of the standard relations in #alp::relations. */
+ namespace internal {
+
+ /**
+ * Standard less-than (\a lt) operator.
+ *
+ * Assumes native availability of operator< on the given data types
+ * or assumes that the relevant operators are properly overloaded.
+ *
+ * Assumes that \a lt is a strict total order. Non-standard/non-matching
+ * data types or non-standard (overloaded) \a operator< should
+ * therefore be used with caution.
+ *
+ * @tparam SET The input data type.
+ */
+ template< typename SET, enum Backend implementation = config::default_backend >
+ class lt {
+
+ public:
+ /** Alias to the domain data type. */
+ typedef SET domain;
+
+ /** Alias to the codomain data type. */
+ typedef SET codomain;
+
+ /**
+ * Whether this relation is \em reflexive; that is,
+ * for all \a a in \a SET, \f$ a < a \f$.
+ */
+ static constexpr bool is_reflexive = false;
+
+ /**
+ * Whether this relation is \em irreflexive; that is,
+ * for all \a a in \a SET, not \f$ a < a \f$.
+ */
+ static constexpr bool is_irreflexive = true;
+
+ /**
+ * Whether this relation is \em symmetric; that is,
+ * for all \a a, \a b in \a SET,
+ * if \f$ a < b \f$ then \f$ b < a \f$.
+ */
+ static constexpr bool is_symmetric = false;
+
+ /**
+ * Whether this relation is \em antisymmetric; that is,
+ * for all \a a, \a b in \a SET, if \f$ a < b \f$ and
+ * \f$ b < a \f$ then \f$ a = b \f$.
+ */
+ static constexpr bool is_antisymmetric = true;
+
+ /**
+ * Whether this relation is \em transitive; that is,
+ * for all \a a, \a b, \a c in \a SET, if \f$ a < b \f$ and
+ * \f$ b < c \f$ then \f$ a < c \f$.
+ */
+ static constexpr bool is_transitive = true;
+
+ /**
+ * Whether this relation is \em connected (or total); that is,
+ * for all \a a, \a b in \a SET, if \f$ a \neq b \f$ then
+ * either \f$ a < b \f$ or \f$ b < a \f$.
+ */
+ static constexpr bool is_connected = true;
+
+ /**
+ * Whether this relation is strongly connected ;
+ * that is,
+ * for all \a a, \a b in \a SET,
+ * either \f$ a < b \f$ or \f$ b < a \f$.
+ */
+ static constexpr bool is_strongly_connected = false;
+
+ /**
+ * This function checks if a < b .
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ *
+ * \warning Passing invalid pointers will result in UB.
+ */
+ static bool check( const domain * const a,
+ const codomain * const b
+ ) {
+ return *a < *b;
+ }
+ };
+
+ /**
+ * Standard greater-than (\a gt) operator.
+ *
+ * Assumes native availability of \a operator> on the given data types
+ * or assumes that the relevant operators are properly overloaded.
+ *
+ * Assumes that \a gt is a strict total order. Non-standard/non-matching
+ * data types or non-standard (overloaded) \a operator> should
+ * therefore be used with caution.
+ *
+ * @tparam SET The input data type.
+ */
+ template< typename SET, enum Backend implementation = config::default_backend >
+ class gt {
+
+ public:
+ /** Alias to the domain data type. */
+ typedef SET domain;
+
+ /** Alias to the codomain data type. */
+ typedef SET codomain;
+
+ /**
+ * Whether this relation is \em reflexive; that is,
+ * for all \a a in \a SET, \f$ a > a \f$.
+ */
+ static constexpr bool is_reflexive = false;
+
+ /**
+ * Whether this relation is \em irreflexive; that is,
+ * for all \a a in \a SET, not \f$ a > a \f$.
+ */
+ static constexpr bool is_irreflexive = true;
+
+ /**
+ * Whether this relation is \em symmetric; that is,
+ * for all \a a, \a b in \a SET,
+ * if \f$ a > b \f$ then \f$ b > a \f$.
+ */
+ static constexpr bool is_symmetric = false;
+
+ /**
+ * Whether this relation is \em antisymmetric; that is,
+ * for all \a a, \a b in \a SET, if \f$ a > b \f$ and
+ * \f$ b > a \f$ then \f$ a = b \f$.
+ */
+ static constexpr bool is_antisymmetric = true;
+
+ /**
+ * Whether this relation is \em transitive; that is,
+ * for all \a a, \a b, \a c in \a SET, if \f$ a > b \f$ and
+ * \f$ b > c \f$ then \f$ a > c \f$.
+ */
+ static constexpr bool is_transitive = true;
+
+ /**
+ * Whether this relation is \em connected (or total); that is,
+ * for all \a a, \a b in \a SET, if \f$ a \neq b \f$ then
+ * either \f$ a > b \f$ or \f$ b > a \f$.
+ */
+ static constexpr bool is_connected = true;
+
+ /**
+ * Whether this relation is strongly connected ;
+ * that is,
+ * for all \a a, \a b in \a SET,
+ * either \f$ a > b \f$ or \f$ b > a \f$.
+ */
+ static constexpr bool is_strongly_connected = false;
+
+ /**
+ * This function checks if a > b .
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ *
+ * \warning Passing invalid pointers will result in UB.
+ */
+ static bool check( const domain * const a,
+ const codomain * const b
+ ) {
+ return *a > *b;
+ }
+ };
+
+ /**
+ * Standard equal (\a eq) relation.
+ *
+ * Assumes native availability of ALP internal operator \a less_than
+ * forming an equivalence relation on SET. Non-standard/non-matching
+ * data types should therefore be used with caution.
+ *
+ * @tparam SET The input data type.
+ */
+ template< typename SET, enum Backend implementation = config::default_backend >
+ class eq {
+
+ public:
+ /** Alias to the domain data type. */
+ typedef SET domain;
+
+ /** Alias to the codomain data type. */
+ typedef SET codomain;
+
+ /**
+ * Whether this relation is \em reflexive; that is,
+ * for all \a a in \a SET, \f$ a = a \f$.
+ */
+ static constexpr bool is_reflexive = true;
+
+ /**
+ * Whether this relation is \em irreflexive; that is,
+ * for all \a a in \a SET, not \f$ a = a \f$.
+ */
+ static constexpr bool is_irreflexive = false;
+
+ /**
+ * Whether this relation is \em symmetric; that is,
+ * for all \a a, \a b in \a SET,
+ * if \f$ a = b \f$ then \f$ b = a \f$.
+ */
+ static constexpr bool is_symmetric = true;
+
+ /**
+ * Whether this relation is \em antisymmetric; that is,
+ * for all \a a, \a b in \a SET, if \f$ a = b \f$ and
+ * \f$ b = a \f$ then \f$ a = b \f$.
+ */
+ static constexpr bool is_antisymmetric = true;
+
+ /**
+ * Whether this relation is \em transitive; that is,
+ * for all \a a, \a b, \a c in \a SET, if \f$ a = b \f$ and
+ * \f$ b = c \f$ then \f$ a = c \f$.
+ */
+ static constexpr bool is_transitive = true;
+
+ /**
+ * Whether this relation is \em connected; that is,
+ * for all \a a, \a b in \a SET, if \f$ a \neq b \f$ then
+ * either \f$ a = b \f$ or \f$ b = a \f$.
+ */
+ static constexpr bool is_connected = false;
+
+ /**
+ * Whether this relation is strongly connected (or total);
+ * that is,
+ * for all \a a, \a b in \a SET,
+ * either \f$ a = b \f$ or \f$ b = a \f$.
+ */
+ static constexpr bool is_strongly_connected = false;
+
+ /**
+ * This function checks if a == b .
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ *
+ * \warning Passing invalid pointers will result in UB.
+ */
+ static bool check( const domain * const a,
+ const codomain * const b
+ ) {
+ bool check;
+ operators::internal::template equal<
+ SET, SET, bool, implementation
+ >::apply( a, b, &check );
+ return check;
+ }
+ };
+
+ /**
+ * Standard not-equal (\a neq) operator.
+ *
+ * Assumes availability of ALP internal operator \a not_equal.
+ *
+ * While \a not_equal does not require to form an order or an
+ * equivalence relation on SET, the formed relation is still assumed
+ * to be irreflexive, symmetric, and connected. Non-standard/non-matching
+ * data types should therefore be used with caution.
+ *
+ * @tparam SET The input data type.
+ */
+ template< typename SET, enum Backend implementation = config::default_backend >
+ class neq {
+
+ public:
+ /** Alias to the domain data type. */
+ typedef SET domain;
+
+ /** Alias to the codomain data type. */
+ typedef SET codomain;
+
+ /**
+ * Whether this relation is \em reflexive; that is,
+ * for all \a a in \a SET, \f$ a \neq a \f$.
+ */
+ static constexpr bool is_reflexive = false;
+
+ /**
+ * Whether this relation is \em irreflexive; that is,
+ * for all \a a in \a SET, not \f$ a \neq a \f$.
+ */
+ static constexpr bool is_irreflexive = true;
+
+ /**
+ * Whether this relation is \em symmetric; that is,
+ * for all \a a, \a b in \a SET,
+ * if \f$ a \neq b \f$ then \f$ b \neq a \f$.
+ */
+ static constexpr bool is_symmetric = true;
+
+ /**
+ * Whether this relation is \em antisymmetric; that is,
+ * for all \a a, \a b in \a SET, if \f$ a \neq b \f$ and
+ * \f$ b \neq a \f$ then \f$ a = b \f$.
+ */
+ static constexpr bool is_antisymmetric = false;
+
+ /**
+ * Whether this relation is \em transitive; that is,
+ * for all \a a, \a b, \a c in \a SET, if \f$ a \neq b \f$ and
+ * \f$ b \neq c \f$ then \f$ a \neq c \f$.
+ */
+ static constexpr bool is_transitive = false;
+
+ /**
+ * Whether this relation is \em connected; that is,
+ * for all \a a, \a b in \a SET, if \f$ a \neq b \f$ then
+ * either \f$ a \neq b \f$ or \f$ b \neq a \f$.
+ */
+ static constexpr bool is_connected = true;
+
+ /**
+ * Whether this relation is strongly connected (or total);
+ * that is,
+ * for all \a a, \a b in \a SET,
+ * either \f$ a \neq b \f$ or \f$ b \neq a \f$.
+ */
+ static constexpr bool is_strongly_connected = false;
+
+ /**
+ * This function checks if a != b .
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ *
+ * \warning Passing invalid pointers will result in UB.
+ */
+ static bool check( const domain * const a,
+ const codomain * const b
+ ) {
+ bool check;
+ operators::internal::template not_equal<
+ SET, SET, bool, implementation
+ >::apply( a, b, &check );
+ return check;
+ }
+ };
+
+ /**
+ * Standard less-than-or-equal (\a le) operator.
+ *
+ * Assumes native availability of \a operator<= on the given data types
+ * or assumes that the relevant operators are properly overloaded.
+ *
+ * Assumes that \a le is a total order. Non-standard/non-matching
+ * data types or non-standard (overloaded) \a operator<= should
+ * therefore be used with caution.
+ *
+ * @tparam SET The input data type.
+ */
+ template< typename SET, enum Backend implementation = config::default_backend >
+ class le {
+
+ public:
+ /** Alias to the domain data type. */
+ typedef SET domain;
+
+ /** Alias to the codomain data type. */
+ typedef SET codomain;
+
+ /**
+ * Whether this relation is \em reflexive; that is,
+ * for all \a a in \a SET, \f$ a \le a \f$.
+ */
+ static constexpr bool is_reflexive = true;
+
+ /**
+ * Whether this relation is \em irreflexive; that is,
+ * for all \a a in \a SET, not \f$ a \le a \f$.
+ */
+ static constexpr bool is_irreflexive = false;
+
+ /**
+ * Whether this relation is \em symmetric; that is,
+ * for all \a a, \a b in \a SET,
+ * if \f$ a \le b \f$ then \f$ b \le a \f$.
+ */
+ static constexpr bool is_symmetric = false;
+
+ /**
+ * Whether this relation is \em antisymmetric; that is,
+ * for all \a a, \a b in \a SET, if \f$ a \le b \f$ and
+ * \f$ b \le a \f$ then \f$ a = b \f$.
+ */
+ static constexpr bool is_antisymmetric = true;
+
+ /**
+ * Whether this relation is \em transitive; that is,
+ * for all \a a, \a b, \a c in \a SET, if \f$ a \le b \f$ and
+ * \f$ b \le c \f$ then \f$ a \le c \f$.
+ */
+ static constexpr bool is_transitive = true;
+
+ /**
+ * Whether this relation is \em connected; that is,
+ * for all \a a, \a b in \a SET, if \f$ a \neq b \f$ then
+ * either \f$ a \le b \f$ or \f$ b \le a \f$.
+ */
+ static constexpr bool is_connected = true;
+
+ /**
+ * Whether this relation is strongly connected (or total);
+ * that is,
+ * for all \a a, \a b in \a SET,
+ * either \f$ a \le b \f$ or \f$ b \le a \f$.
+ */
+ static constexpr bool is_strongly_connected = true;
+
+ /**
+ * This function checks if a <= b .
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ *
+ * \warning Passing invalid pointers will result in UB.
+ */
+ static bool check( const domain * const a,
+ const codomain * const b
+ ) {
+ return *a <= *b;
+ }
+ };
+
+ /**
+ * Standard greater-than-or-equal (\a ge) operator.
+ *
+ * Assumes native availability of \a operator>= on the given data types
+ * or assumes that the relevant operators are properly overloaded.
+ *
+ * Assumes that \a ge is a total order. Non-standard/non-matching
+ * data types or non-standard (overloaded) \a operator>= should
+ * therefore be used with caution.
+ *
+ * @tparam SET The input data type.
+ */
+ template< typename SET, enum Backend implementation = config::default_backend >
+ class ge {
+
+ public:
+ /** Alias to the domain data type. */
+ typedef SET domain;
+
+ /** Alias to the codomain data type. */
+ typedef SET codomain;
+
+ /**
+ * Whether this relation is \em reflexive; that is,
+ * for all \a a in \a SET, \f$ a \ge a \f$.
+ */
+ static constexpr bool is_reflexive = true;
+
+ /**
+ * Whether this relation is \em irreflexive; that is,
+ * for all \a a in \a SET, not \f$ a \ge a \f$.
+ */
+ static constexpr bool is_irreflexive = false;
+
+ /**
+ * Whether this relation is \em symmetric; that is,
+ * for all \a a, \a b in \a SET,
+ * if \f$ a \ge b \f$ then \f$ b \ge a \f$.
+ */
+ static constexpr bool is_symmetric = false;
+
+ /**
+ * Whether this relation is \em antisymmetric; that is,
+ * for all \a a, \a b in \a SET, if \f$ a \ge b \f$ and
+ * \f$ b \ge a \f$ then \f$ a = b \f$.
+ */
+ static constexpr bool is_antisymmetric = true;
+
+ /**
+ * Whether this relation is \em transitive; that is,
+ * for all \a a, \a b, \a c in \a SET, if \f$ a \ge b \f$ and
+ * \f$ b \ge c \f$ then \f$ a \ge c \f$.
+ */
+ static constexpr bool is_transitive = true;
+
+ /**
+ * Whether this relation is \em connected; that is,
+ * for all \a a, \a b in \a SET, if \f$ a \neq b \f$ then
+ * either \f$ a \ge b \f$ or \f$ b \ge a \f$.
+ */
+ static constexpr bool is_connected = true;
+
+ /**
+ * Whether this relation is strongly connected (or total);
+ * that is,
+ * for all \a a, \a b in \a SET,
+ * either \f$ a \ge b \f$ or \f$ b \ge a \f$.
+ */
+ static constexpr bool is_strongly_connected = true;
+
+ /**
+ * This function checks if a >= b .
+ *
+ * @param[in] a The left-hand side input. Must be pre-allocated and initialised.
+ * @param[in] b The right-hand side input. Must be pre-allocated and initialised.
+ *
+ * \warning Passing invalid pointers will result in UB.
+ */
+ static bool check( const domain * const a,
+ const codomain * const b
+ ) {
+ return *a >= *b;
+ }
+ };
+
+ /**
+ * This class takes a generic operator implementation and exposes a more
+ * convenient test() function based on it. This function allows arbitrary
+ * data types being passed as parameters, and automatically handles any
+ * casting required for the raw operator.
+ *
+ * @tparam REL The generic operator implementation.
+ *
+ */
+ template< typename REL, enum Backend implementation = config::default_backend >
+ class RelationBase {
+
+ public:
+
+ /** The domain type. */
+ typedef typename REL::domain D1;
+
+ /** The codomain type. */
+ typedef typename REL::codomain D2;
+
+ /** @return Whether this relation is reflexive. */
+ static constexpr bool is_reflexive() {
+ return REL::is_reflexive;
+ }
+
+ /** @return Whether this relation is irreflexive. */
+ static constexpr bool is_irreflexive() {
+ return REL::is_irreflexive;
+ }
+
+ /** @return Whether this relation is symmetric. */
+ static constexpr bool is_symmetric() {
+ return REL::is_symmetric;
+ }
+
+ /** @return Whether this relation is antisymmetric. */
+ static constexpr bool is_antisymmetric() {
+ return REL::is_antisymmetric;
+ }
+
+ /** @return Whether this relation is transitive. */
+ static constexpr bool is_transitive() {
+ return REL::is_transitive;
+ }
+
+ /** @return Whether this relation is connected. */
+ static constexpr bool is_connected() {
+ return REL::is_connected;
+ }
+
+ /** @return Whether this relation is strongly connected. */
+ static constexpr bool is_strongly_connected() {
+ return REL::is_strongly_connected;
+ }
+
+ /**
+ * This function checks if \f$ x REL y \f$.
+ *
+ * @tparam InputType1 The type of the input parameter \a x.
+ * @tparam InputType2 The type of the input parameter \a y.
+ *
+ * \warning If \a InputType1 does not match \a D1 \em or \a InputType2 does
+ * not match \a D2, then input will be cast into temporary
+ * variables of the correct types.
+ *
+ * \note Best performance is thus only guaranteed when all domains match.
+ *
+ * @param[in] x The left-hand side input.
+ * @param[in] y The right-hand side input.
+ */
+ template< typename InputType1, typename InputType2 >
+ static bool check( const InputType1 &x, const InputType2 &y ) {
+ const D1 a = static_cast< D1 >( x );
+ const D2 b = static_cast< D2 >( y );
+ return REL::check( &a, &b );
+ }
+
+ /**
+ * This is the high-performance version of check() in the sense that no
+ * casting is required. This version will be automatically called whenever
+ * possible.
+ */
+ static bool check( const D1 &x, const D2 &y ) {
+ return REL::check( &x, &y );
+ }
+ };
+
+ /**
+ * This is the relation interface exposed to the ALP implementation.
+ *
+ * This class wraps around a base relation of type \a REL we denote by
+ * \f$ REL \subseteq D_1\times D_2 \f$.
+ *
+ * \parblock
+ * \par Base Operators
+ *
+ * The class \a REL is expected to define the following public function:
+ * - \a check, which takes two pointers to parameters \f$ a \in D_1 \f$
+ * and \f$ b \in D_2 \f$ and checks if
+ * \f$ a REL b \f$.
+ *
+ * It is also expected to define the following types:
+ * - \a domain, which corresponds to \f$ D_1 \f$,
+ * - \a codomain, which corresponds to \f$ D_2 \f$.
+ *
+ * It is also expected to define the following public boolean fields:
+ * - \a is_reflexive
+ * - \a is_irreflexive
+ * - \a is_symmetric
+ * - \a is_antisymmetric
+ * - \a is_transitive
+ * - \a is_connected
+ * - \a is_strongly_connected
+ *
+ * For an example of base relation, see alp::relations::internal::lt.
+ *
+ * \note ALP users should never access these classes directly. This
+ * documentation is provided for developers to understand or extend
+ * the current implementation, for example to include new relations.
+ *
+ * \endparblock
+ *
+ * \parblock
+ * \par The exposed GraphBLAS Relation Interface
+ *
+ * The Base Relations as illustrated above are wrapped by this class to
+ * provide a more convient API. It translates the functionality of any Base
+ * Relation and exposes the following interface instead:
+ *
+ * -# check, which takes two parameters \f$ a, b \f$ of arbitrary
+ * types and checks \f$ a REL b \f$ after performing any
+ * casting if required.
+ * \endparblock
+ *
+ * \note This class only allows wrapping of stateless base relations. This
+ * ALP implementation in principle allows for stateful
+ * relations, though they must be provided by a specialised class
+ * which directly implements the above public interface.
+ *
+ * @see RelationBase::check
+ *
+ * \parblock
+ * \par Providing New Relations
+ *
+ * New relations are easily added to this
+ * ALP implementation by providing a base relation and wrapping this
+ * class around it, as illustrated, e.g., by alp::relations::lt as follows:
+ *
+ * \snippet rels.hpp Relation Wrapping
+ *
+ * This need to be compatible with the ALP type traits, specifically,
+ * the #is_relation template. To ensure this, a specialisation of it must be
+ * privided:
+ *
+ * \snippet rels.hpp Relation Type Traits
+ * \endparblock
+ */
+ template< typename REL, enum Backend implementation = config::default_backend >
+ class Relation : public RelationBase< REL, implementation > {
+
+ // public:
+ // typedef typename RelationBase< REL, implementation >::D1 D1;
+ // typedef typename RelationBase< REL, implementation >::D2 D2;
+
+ };
+
+ /**
+ *
+ * @tparam REL The generic homogeneous relation.
+ *
+ * @see Relation
+ * @see RelationBase for additional functions exposed to the final relation.
+ */
+ template<
+ typename REL,
+ enum Backend implementation = config::default_backend,
+ std::enable_if_t<
+ std::is_same<
+ typename REL::domain,
+ typename REL::codomain
+ >::value
+ > * = nullptr
+ >
+ class HomogeneousRelation : public Relation< REL, implementation > {
+ };
+
+ } // namespace internal
+
+ } // namespace relations
+
+ template< typename Rel >
+ struct is_homogeneous_relation {
+ static const constexpr bool value = is_relation< Rel >::value
+ and std::is_same< typename Rel::D1, typename Rel::D2 >::value;
+ };
+
+ template< typename Rel >
+ struct is_reflexive {
+ static const constexpr bool value = is_homogeneous_relation< Rel >::value
+ and Rel::is_reflexive();
+ };
+
+ template< typename Rel >
+ struct is_irreflexive {
+ static const constexpr bool value = is_homogeneous_relation< Rel >::value
+ and Rel::is_irreflexive();
+ };
+
+ template< typename Rel >
+ struct is_symmetric {
+ static const constexpr bool value = is_homogeneous_relation< Rel >::value
+ and Rel::is_symmetric();
+ };
+
+ template< typename Rel >
+ struct is_antisymmetric {
+ static const constexpr bool value = is_homogeneous_relation< Rel >::value
+ and Rel::is_antisymmetric();
+ };
+
+ template< typename Rel >
+ struct is_transitive {
+ static const constexpr bool value = is_homogeneous_relation< Rel >::value
+ and Rel::is_transitive();
+ };
+
+ template< typename Rel >
+ struct is_connected {
+ static const constexpr bool value = is_homogeneous_relation< Rel >::value
+ and Rel::is_connected();
+ };
+
+ template< typename Rel >
+ struct is_strongly_connected {
+ static const constexpr bool value = is_homogeneous_relation< Rel >::value
+ and Rel::is_strongly_connected();
+ };
+
+ template< typename Rel >
+ struct is_asymmetric {
+ static const constexpr bool value = is_irreflexive< Rel >::value
+ and is_antisymmetric< Rel >::value;
+ };
+
+ template< typename Rel >
+ struct is_partial_order {
+ static const constexpr bool value = is_reflexive< Rel >::value
+ and is_antisymmetric< Rel >::value
+ and is_transitive< Rel >::value;
+ };
+
+ template< typename Rel >
+ struct is_strict_partial_order {
+ static const constexpr bool value = is_asymmetric< Rel >::value
+ and is_transitive< Rel >::value;
+ };
+
+ template< typename Rel >
+ struct is_total_order {
+ static const constexpr bool value = is_partial_order< Rel >::value
+ and is_strongly_connected< Rel >::value;
+ };
+
+ template< typename Rel >
+ struct is_strict_total_order {
+ static const constexpr bool value = is_strict_partial_order< Rel >::value
+ and is_connected< Rel >::value;
+ };
+
+ template< typename Rel >
+ struct is_equivalence_relation {
+ static const constexpr bool value = is_reflexive< Rel >::value
+ and is_symmetric< Rel >::value
+ and is_transitive< Rel >::value;
+ };
+
+} // namespace alp
+
+#endif // _H_ALP_INTERNAL_RELATIONS_BASE
+
diff --git a/include/alp/base/io.hpp b/include/alp/base/io.hpp
new file mode 100644
index 000000000..926b4b7b6
--- /dev/null
+++ b/include/alp/base/io.hpp
@@ -0,0 +1,624 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 21st of February, 2017
+ */
+
+#ifndef _H_ALP_IO_BASE
+#define _H_ALP_IO_BASE
+
+#include
+#include
+#include
+
+#include "matrix.hpp"
+#include "scalar.hpp"
+#include "vector.hpp"
+
+
+namespace alp {
+
+ /**
+ * \defgroup IO Data Ingestion and Extraction.
+ * Provides functions for putting user data into opaque GraphBLAS objects,
+ * and provides functions for extracting data from opaque GraphBLAS objects.
+ *
+ * The GraphBLAS operates on opaque data objects. Users can input data using
+ * alp::buildVector and/or alp::buildMatrixUnique. This group provides free
+ * functions that automatically dispatch to those variants.
+ *
+ * The standard output methods are provided by alp::Vector::cbegin and
+ * alp::Vector::cend, and similarly for alp::Matrix. Iterators provide
+ * parallel output (see #IOMode for a discussion on parallel versus
+ * sequential IO).
+ *
+ * Sometimes it is desired to have direct access to a GraphBLAS memory
+ * area, and to have that memory available even after the GraphBLAS
+ * context has been closed (via alp::finalize). This functionality is
+ * provided by alp::pin_memory.
+ *
+ * @{
+ */
+
+ template< typename D, typename Structure, typename View, typename ImfR, typename ImfC, enum Backend backend >
+ size_t nrows( const Matrix< D, Structure, Density::Dense, View, ImfR, ImfC, backend > & A ) noexcept;
+
+ template< typename D, typename Structure, typename View, typename ImfR, typename ImfC, enum Backend backend >
+ size_t ncols( const Matrix< D, Structure, Density::Dense, View, ImfR, ImfC, backend > & A ) noexcept;
+
+ template< typename D, typename Structure, typename View, typename ImfR, typename ImfC, enum Backend backend >
+ std::pair< size_t, size_t > dims( const Matrix< D, Structure, Density::Dense, View, ImfR, ImfC, backend > & A ) noexcept;
+
+ /**
+ * Request the size (dimension) of a given Vector.
+ */
+ template<
+ typename DataType, typename DataStructure, typename View,
+ typename ImfR, typename ImfC, Backend backend
+ >
+ size_t size(
+ const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x
+ ) noexcept {
+
+#ifndef NDEBUG
+ const bool selected_backend_does_not_support_size_for_vector = false;
+ assert( selected_backend_does_not_support_size_for_vector );
+#endif
+ (void) x;
+ return SIZE_MAX;
+ }
+
+ /**
+ * Request the number of nonzeroes in a given Vector.
+ */
+ template<
+ typename DataType, typename DataStructure, typename View,
+ typename ImfR, typename ImfC, Backend backend
+ >
+ size_t nnz(
+ const Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, reference > &x
+ ) noexcept {
+
+#ifndef NDEBUG
+ const bool selected_backend_does_not_support_nnz_for_vector = false;
+ assert( selected_backend_does_not_support_nnz_for_vector );
+#endif
+ (void) x;
+ return SIZE_MAX;
+ }
+
+ /**
+ * Retrieve the number of nonzeroes contained in this matrix.
+ *
+ * @returns The number of nonzeroes the current matrix contains.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function consitutes \f$ \Theta(1) \f$ work.
+ * -# This function allocates no additional dynamic memory.
+ * -# This function uses \f$ \mathcal{O}(1) \f$ memory
+ * beyond that which was already used at function entry.
+ * -# This function will move
+ * \f$ \mathit{sizeof}( size\_t ) \f$
+ * bytes of memory.
+ * \endparblock
+ */
+ template<
+ typename DataType, typename Structure, typename View,
+ typename ImfR, typename ImfC, Backend backend
+ >
+ size_t nnz(
+ const Matrix< DataType, Structure, Density::Dense, View, ImfR, ImfC, reference > &A
+ ) noexcept {
+
+#ifndef NDEBUG
+ const bool selected_backend_does_not_support_nnz_for_matrix = false;
+ assert( selected_backend_does_not_support_nnz_for_matrix );
+#endif
+ (void) A;
+ return SIZE_MAX;
+ }
+
+ /**
+ * Clears all elements from the given vector \a x.
+ *
+ * At the end of this operation, the number of nonzero elements in this vector
+ * will be zero. The size of the vector remains unchanged.
+ */
+ template<
+ typename DataType, typename DataStructure, typename View,
+ typename ImfR, typename ImfC, Backend backend
+ >
+ RC clear(
+ Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x
+ ) noexcept {
+ (void) x;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Resizes the Scalar to have at least the given number of nonzeroes.
+ * The contents of the scalar are not retained.
+ */
+ template<
+ typename InputType, typename InputStructure,
+ typename length_type, Backend backend
+ >
+ RC resize( Scalar< InputType, InputStructure, backend > &s, const length_type new_nz ) {
+ (void) s;
+ (void) new_nz;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Resizes the vector to have at least the given number of nonzeroes.
+ * The contents of the vector are not retained.
+ */
+ template<
+ typename InputType, typename InputStructure, typename View,
+ typename ImfR, typename ImfC,
+ typename length_type, Backend backend
+ >
+ RC resize(
+ Vector< InputType, InputStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
+ const length_type new_nz
+ ) noexcept {
+ (void) x;
+ (void) new_nz;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Resizes the matrix to have at least the given number of nonzeroes.
+ * The contents of the matrix are not retained.
+ */
+ template<
+ typename InputType, typename InputStructure, typename InputView,
+ typename InputImfR, typename InputImfC, Backend backend
+ >
+ RC resize(
+ Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A,
+ const size_t new_nz
+ ) noexcept {
+ (void) A;
+ (void) new_nz;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Sets all elements of a Vector to the given value.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename DataType, typename DataStructure, typename View,
+ typename ImfR, typename ImfC,
+ typename T, typename ValStructure,
+ Backend backend
+ >
+ RC set(
+ Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
+ const Scalar< T, ValStructure, backend > val,
+ const std::enable_if_t<
+ !alp::is_object< DataType >::value &&
+ !alp::is_object< T >::value
+ > * const = nullptr
+ ) {
+ (void) x;
+ (void) val;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Sets the element of a given Vector at a given position to a given value.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename DataType, typename DataStructure, typename View,
+ typename ImfR, typename ImfC,
+ typename T, typename ValStructure,
+ Backend backend
+ >
+ RC setElement(
+ Vector< DataType, DataStructure, Density::Dense, View, ImfR, ImfC, backend > &x,
+ const Scalar< T, ValStructure, backend > val,
+ const size_t i,
+ const std::enable_if_t<
+ !alp::is_object< DataType >::value &&
+ !alp::is_object< T >::value
+ > * const = nullptr
+ ) {
+ (void) x;
+ (void) val;
+ (void) i;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Sets all elements of the output matrix to the values of the input matrix.
+ * C = A
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView,
+ typename OutputImfR, typename OutputImfC,
+ typename InputType, typename InputStructure, typename InputView,
+ typename InputImfR, typename InputImfC,
+ Backend backend
+ >
+ RC set(
+ Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+ const Matrix< InputType, InputStructure, Density::Dense, InputView, InputImfR, InputImfC, backend > &A
+ ) noexcept {
+ (void) C;
+ (void) A;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Sets all elements of the given matrix to the value of the given scalar.
+ * C = val
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename OutputType, typename OutputStructure, typename OutputView,
+ typename OutputImfR, typename OutputImfC,
+ typename InputType, typename InputStructure,
+ Backend backend
+ >
+ RC set(
+ Matrix< OutputType, OutputStructure, Density::Dense, OutputView, OutputImfR, OutputImfC, backend > &C,
+ const Scalar< InputType, InputStructure, backend > &val
+ ) noexcept {
+ (void) C;
+ (void) val;
+ return UNSUPPORTED;
+ }
+
+ /**
+ * Constructs a dense vector from a container of exactly alp::size(x)
+ * elements. This function aliases to the buildVector routine that takes
+ * an accumulator, using alp::operators::right_assign (thus overwriting
+ * any old contents).
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType, typename fwd_iterator,
+ Backend backend, typename Coords
+ >
+ RC buildVector(
+ internal::Vector< InputType, backend > &x,
+ fwd_iterator start, const fwd_iterator end,
+ const IOMode mode
+ ) {
+ operators::right_assign< InputType > accum;
+ return buildVector< descr >( x, accum, start, end, mode );
+ }
+
+ /**
+ * Ingests possibly sparse input from a container to which iterators are
+ * provided. This function dispatches to the buildVector routine that
+ * includes an accumulator, here set to alp::operators::right_assign.
+ * Any existing values in \a x that overlap with newer values will hence
+ * be overwritten.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType,
+ class Merger = operators::right_assign< InputType >,
+ typename fwd_iterator1, typename fwd_iterator2,
+ Backend backend, typename Coords
+ >
+ RC buildVector(
+ internal::Vector< InputType, backend > &x,
+ fwd_iterator1 ind_start, const fwd_iterator1 ind_end,
+ fwd_iterator2 val_start, const fwd_iterator2 val_end,
+ const IOMode mode, const Merger &merger = Merger()
+ ) {
+ operators::right_assign< InputType > accum;
+ return buildVector< descr >( x, accum, ind_start, ind_end, val_start, val_end, mode, merger );
+ }
+
+ /**
+ * Ingests a set of nonzeroes into a given vector \a x.
+ *
+ * Old values will be overwritten. The given set of nonzeroes must not contain
+ * duplicate nonzeroes that should be stored at the same index.
+ *
+ * \warning Inputs with duplicate nonzeroes when passed into this function will
+ * invoke undefined behaviour.
+ *
+ * @param[in,out] x The vector where to ingest nonzeroes into.
+ * @param[in] ind_start Start iterator to the nonzero indices.
+ * @param[in] ind_end End iterator to the nonzero indices.
+ * @param[in] val_start Start iterator to the nonzero values.
+ * @param[in] val_end End iterator to the nonzero values.
+ * @param[in] mode Whether sequential or parallel ingestion is requested.
+ *
+ * The containers the two iterator pairs point to must contain an equal number
+ * of elements. Any pre-existing nonzeroes that do not overlap with any nonzero
+ * between \a ind_start and \a ind_end will remain unchanged.
+ *
+ * \parblock
+ * \par Performance semantics:
+ * A call to this function
+ * -# comprises \f$ \mathcal{O}( n ) \f$ work where \a n is the number of
+ * elements pointed to by the given iterator pairs. This work may be
+ * distributed over multiple user processes.
+ * -# results in at most \f$ n \mathit{sizeof}( T ) +
+ * n \mathit{sizeof}( U ) +
+ * n \mathit{sizeof}( \mathit{InputType} ) +
+ * 2 n \mathit{sizeof}( \mathit{bool} ) \f$
+ * bytes of data movement, where \a T and \a U are the underlying data
+ * types of the input iterators. These costs may be distributed over
+ * multiple user processes.
+ * -# inter-process communication costs are \f$ \mathcal{O}(n) g + l \f$.
+ * -# if the capacity of this vector is not large enough to hold \a n
+ * elements, a call to this function may allocate
+ * \f$ \mathcal{O}( n ) \f$
+ * new bytes of memory which \em may be distributed over multiple user
+ * processes.
+ * -# if the capacity of this vector is not large enough to hold \a n
+ * elements, a call to this function may result in system calls at any of
+ * the user processes.
+ * -# If the IOMode is sequential, then the work and data movement costs are
+ * incurred per user process and will not be distributed. In this
+ * case the inter-process communication costs will, however, be zero.
+ * -# if the IOMode is parallel, then a good implementation under a uniformly
+ * randomly distributed input incurs an inter-process communication cost
+ * of expected value \f$ n/p g + l \f$. The best-case inter-process cost
+ * is \f$ (p-1)g + l \f$.
+ * \endparblock
+ *
+ * @returns alp::SUCCESS When ingestion has completed successfully.
+ * @returns alp::ILLEGAL When a nonzero has an index larger than alp::size(x).
+ * @returns alp::PANIC If an unmitigable error has occured during ingestion.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType,
+ class Merger = operators::right_assign< InputType >,
+ typename fwd_iterator1, typename fwd_iterator2,
+ Backend backend, typename Coords
+ >
+ RC buildVectorUnique(
+ internal::Vector< InputType, backend > &x,
+ fwd_iterator1 ind_start, const fwd_iterator1 ind_end,
+ fwd_iterator2 val_start, const fwd_iterator2 val_end,
+ const IOMode mode
+ ) {
+ return buildVector< descr | descriptors::no_duplicates >( x,
+ ind_start, ind_end,
+ val_start, val_end,
+ mode );
+ }
+
+ /**
+ * Assigns nonzeroes to the matrix from a coordinate format.
+ *
+ * Invalidates any prior existing content. Disallows different nonzeroes
+ * to have the same row and column coordinates; input must consist out of
+ * unique triples. See #buildMatrix for an alternate function that does
+ * not have these restrictions-- at the cost of lower performance.
+ *
+ * \warning Calling this function with duplicate input coordinates will
+ * lead to undefined behaviour.
+ *
+ * @tparam descr The descriptor used. The default is
+ * #alp::descriptors::no_operation, which means that
+ * no pre- or post-processing of input or input is
+ * performed.
+ * @tparam fwd_iterator1 The type of the row index iterator.
+ * @tparam fwd_iterator2 The type of the column index iterator.
+ * @tparam fwd_iterator3 The type of the nonzero value iterator.
+ * @tparam length_type The type of the number of elements in each iterator.
+ *
+ * The iterators will only be used to read from, never to assign to.
+ *
+ * @param[in] I A forward iterator to \a cap row indices.
+ * @param[in] J A forward iterator to \a cap column indices.
+ * @param[in] V A forward iterator to \a cap nonzero values.
+ * @param[in] nz The number of items pointed to by \a I, \a J, \em and \a V.
+ *
+ * @return alp::MISMATCH -# when an element from \a I dereferences to a value
+ * larger than the row dimension of this matrix, or
+ * -# when an element from \a J dereferences to a value
+ * larger than the column dimension of this matrix.
+ * When this error code is returned the state of this
+ * container will be as though this function was never
+ * called; however, the given forward iterators may
+ * have been copied and the copied iterators may have
+ * incurred multiple increments and dereferences.
+ * @return alp::OVERFLW When the internal data type used for storing the
+ * number of nonzeroes is not large enough to store
+ * the number of nonzeroes the user wants to assign.
+ * When this error code is returned the state of this
+ * container will be as though this function was never
+ * called; however, the given forward iterators may
+ * have been copied and the copied iterators may have
+ * incurred multiple increments and dereferences.
+ * @return alp::SUCCESS When the function completes successfully.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function contains
+ * \f$ \Theta(\mathit{nz})+\mathcal{O}(m+n)) \f$ amount of work.
+ * -# This function may dynamically allocate
+ * \f$ \Theta(\mathit{nz})+\mathcal{O}(m+n)) \f$ bytes of memory.
+ * -# A call to this function will use \f$ \mathcal{O}(m+n) \f$ bytes
+ * of memory beyond the memory in use at the function call entry.
+ * -# This function will copy each input forward iterator at most
+ * \em once; the three input iterators \a I, \a J, and \a V thus
+ * may have exactly one copyeach, meaning that all input may be
+ * traversed only once.
+ * base/blas1.hpp -# Each of the at most three iterator copies will be incremented
+ * at most \f$ \mathit{nz} \f$ times.
+ * -# Each position of the each of the at most three iterator copies
+ * will be dereferenced exactly once.
+ * -# This function moves
+ * \f$ \Theta(\mathit{nz})+\mathcal{O}(m+n)) \f$ bytes of data.
+ * -# This function will likely make system calls.
+ * \endparblock
+ *
+ * \warning This is an expensive function. Use sparingly and only when
+ * absolutely necessary.
+ *
+ * \note Streaming input can be implemented by supplying buffered
+ * iterators to this GraphBLAS implementation.
+ *
+ * \note The functionality herein described is exactly that of buildMatrix,
+ * though with stricter input requirements. These requirements allow
+ * much faster construction.
+ *
+ * \note No masked version of this variant is provided. The use of masks in
+ * matrix construction is costly and the user is referred to the
+ * costly buildMatrix() function instead.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType,
+ typename fwd_iterator1 = const size_t * __restrict__,
+ typename fwd_iterator2 = const size_t * __restrict__,
+ typename fwd_iterator3 = const InputType * __restrict__,
+ typename length_type = size_t,
+ Backend implementation = config::default_backend
+ >
+ RC buildMatrixUnique(
+ internal::Matrix< InputType, implementation > &A,
+ fwd_iterator1 I, fwd_iterator1 I_end,
+ fwd_iterator2 J, fwd_iterator2 J_end,
+ fwd_iterator3 V, fwd_iterator3 V_end,
+ const IOMode mode
+ ) {
+ // derive synchronized iterator
+ auto start = utils::makeSynchronized( I, J, V, I_end, J_end, V_end );
+ const auto end = utils::makeSynchronized( I_end, J_end, V_end, I_end, J_end, V_end );
+
+ // defer to other signature
+ return buildMatrixUnique< descr >( A, start, end, mode );
+ }
+
+ /**
+ * Alias that transforms a set of pointers and an array length to the
+ * buildMatrixUnique variant based on iterators.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType,
+ typename fwd_iterator1 = const size_t * __restrict__,
+ typename fwd_iterator2 = const size_t * __restrict__,
+ typename fwd_iterator3 = const InputType * __restrict__,
+ typename length_type = size_t,
+ Backend implementation = config::default_backend
+ >
+ RC buildMatrixUnique( internal::Matrix< InputType, implementation > &A,
+ fwd_iterator1 I, fwd_iterator2 J, fwd_iterator3 V,
+ const size_t nz, const IOMode mode
+ ) {
+ return buildMatrixUnique< descr >( A,
+ I, I + nz,
+ J, J + nz,
+ V, V + nz,
+ mode
+ );
+ }
+
+ /** Version of the above #buildMatrixUnique that handles \a NULL value pointers. */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType,
+ typename fwd_iterator1 = const size_t * __restrict__,
+ typename fwd_iterator2 = const size_t * __restrict__,
+ typename length_type = size_t,
+ Backend implementation = config::default_backend
+ >
+ RC buildMatrixUnique(
+ internal::Matrix< InputType, implementation > &A,
+ fwd_iterator1 I, fwd_iterator2 J,
+ const length_type nz, const IOMode mode
+ ) {
+ // derive synchronized iterator
+ auto start = utils::makeSynchronized( I, J, I + nz, J + nz );
+ const auto end = utils::makeSynchronized( I + nz, J + nz, I + nz, J + nz );
+
+ // defer to other signature
+ return buildMatrixUnique< descr >( A, start, end, mode );
+ }
+
+ /**
+ * Version of buildMatrixUnique that works by supplying a single iterator
+ * (instead of three).
+ *
+ * This is useful in cases where the input is given as a single struct per
+ * nonzero, whatever this struct may be exactly, as opposed to multiple
+ * containers for row indices, column indices, and nonzero values.
+ *
+ * This GraphBLAS implementation provides both input modes since which one is
+ * more appropriate (and performant!) depends mostly on how the data happens
+ * to be stored in practice.
+ *
+ * @tparam descr The currently active descriptor.
+ * @tparam InputType The value type the output matrix expects.
+ * @tparam fwd_iterator The iterator type.
+ * @tparam implementation For which backend a matrix is being read.
+ *
+ * The iterator \a fwd_iterator, in addition to being STL-compatible, must
+ * support the following three public functions:
+ * -# S fwd_iterator.i(); which returns the row index of the current
+ * nonzero;
+ * -# S fwd_iterator.j(); which returns the columnindex of the
+ * current nonzero;
+ * -# V fwd_iterator.v(); which returns the nonzero value of the
+ * current nonzero.
+ *
+ * It also must provide the following public typedefs:
+ * -# fwd_iterator::row_coordinate_type
+ * -# fwd_iterator::column_coordinate_type
+ * -# fwd_iterator::nonzero_value_type
+ *
+ * This means a specialised iterator is required for use with this function.
+ * See, for example, alp::utils::internal::MatrixFileIterator.
+ *
+ * @param[out] A The matrix to be filled with nonzeroes from \a start to
+ * \a end.
+ * @param[in] start Iterator pointing to the first nonzero to be added.
+ * @param[in] end Iterator pointing past the last nonzero to be added.
+ */
+ template<
+ Descriptor descr = descriptors::no_operation,
+ typename InputType, typename fwd_iterator,
+ Backend implementation = config::default_backend
+ >
+ RC buildMatrixUnique(
+ internal::Matrix< InputType, implementation > &A,
+ fwd_iterator start, const fwd_iterator end,
+ const IOMode mode
+ ) {
+ (void)A;
+ (void)start;
+ (void)end;
+ (void)mode;
+ return UNSUPPORTED;
+ }
+
+ /** @} */
+
+} // namespace alp
+
+#endif // end _H_ALP_IO_BASE
+
diff --git a/include/alp/base/matrix.hpp b/include/alp/base/matrix.hpp
new file mode 100644
index 000000000..605e23ec5
--- /dev/null
+++ b/include/alp/base/matrix.hpp
@@ -0,0 +1,487 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 10th of August
+ */
+
+#ifndef _H_ALP_MATRIX_BASE
+#define _H_ALP_MATRIX_BASE
+
+#include
+#include
+#include
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+
+namespace alp {
+
+ namespace internal {
+ /**
+ * A GraphBLAS matrix.
+ *
+ * This is an opaque data type that implements the below functions.
+ *
+ * @tparam D The type of a nonzero element. \a D shall not be a GraphBLAS
+ * type.
+ * @tparam implementation Allows multiple backends to implement different
+ * versions of this data type.
+ *
+ * \warning Creating a alp::Matrix of other GraphBLAS types is
+ * not allowed .
+ * Passing a GraphBLAS type as template parameter will lead to
+ * undefined behaviour.
+ */
+ template< typename D, enum Backend implementation >
+ class Matrix {
+
+ typedef Matrix< D, implementation > self_type;
+
+ public :
+
+ /**
+ * A standard iterator for a GraphBLAS aatrix.
+ *
+ * This iterator is used for data extraction only. Hence only this const
+ * version is specified.
+ *
+ * Dereferencing an iterator of this type that is not in end position yields
+ * a pair \f$ (c,v) \f$. The value \a v is of type \a D and corresponds to
+ * the value of the dereferenced nonzero.
+ * The value \a c is another pair \f$ (i,j) \f$. The values \a i and \a j
+ * are of type size_t and correspond to the coordinate of the
+ * dereferenced nonzero.
+ *
+ * \note `Pair' here corresponds to the regular std::pair.
+ *
+ * \warning Comparing two const iterators corresponding to different
+ * containers leads to undefined behaviour.
+ * \warning Advancing an iterator past the end iterator of the container
+ * it corresponds to, leads to undefined behaviour.
+ * \warning Modifying the contents of a container makes any use of any
+ * iterator derived from it incur invalid behaviour.
+ * \note These are standard limitations of STL iterators.
+ */
+ class const_iterator : public std::iterator< std::forward_iterator_tag, std::pair< std::pair< const size_t, const size_t >, const D >, size_t > {
+
+ public :
+
+ /** Standard equals operator. */
+ bool
+ operator==( const const_iterator & other ) const { (void)other; return false; }
+
+ /** @returns The negation of operator==(). */
+ bool operator!=( const const_iterator & other ) const {
+ (void)other;
+ return true;
+ }
+
+ /**
+ * Dereferences the current position of this iterator.
+ *
+ * @return If this iterator is valid and not in end position, this returns
+ * an std::pair with in its first field the position of the
+ * nonzero value, and in its second field the value of the nonzero.
+ * The position of a nonzero is another std::pair with both the
+ * first and second field of type size_t.
+ *
+ * \note If this iterator is invalid or in end position, the result is
+ * undefined.
+ */
+ std::pair< const size_t, const D > operator*() const {
+ return std::pair< const size_t, const D >();
+ }
+
+ /**
+ * Advances the position of this iterator by one.
+ *
+ * If the current position corresponds to the last element in the
+ * container, the new position of this iterator will be its end
+ * position.
+ *
+ * If the current position of this iterator is already the end
+ * position, this iterator will become invalid; any use of invalid
+ * iterators will lead to undefined behaviour.
+ *
+ * @return A reference to this iterator.
+ */
+ const_iterator & operator++() {
+ return *this;
+ }
+
+ }; // class const_iterator
+
+ /** The value type of elements stored in this matrix. */
+ typedef D value_type;
+
+ /**
+ * The main GraphBLAS matrix constructor.
+ *
+ * Matrix nonzeroes will be uninitalised after successful construction.
+ *
+ * Requesting a matrix with zero \a rows or \a columns will yield an empty
+ * matrix; i.e., it will be useless but will not result in an error.
+ *
+ * @param rows The number of rows in the new matrix.
+ * @param columns The number of columns in the new matrix.
+ *
+ * @return SUCCESS This function never fails.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This constructor completes in \f$ \Theta(1) \f$ time.
+ * -# This constructor will not allocate any new dynamic memory.
+ * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of
+ * memory beyond that at constructor entry.
+ * -# This constructor incurs \f$ \Theta(1) \f$ data movement.
+ * -# This constructor \em may make system calls.
+ * \endparblock
+ *
+ * \warning Avoid the use of this constructor within performance critical
+ * code sections.
+ */
+ Matrix( const size_t rows, const size_t columns ) {
+ (void)rows;
+ (void)columns;
+ }
+
+ /**
+ * Copy constructor.
+ *
+ * @param other The matrix to copy.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * Allocates the same capacity as the \a other matrix, even if the
+ * actual number of nonzeroes contained in \a other is less.
+ * -# This constructor entails \f$ \Theta(\mathit{nz}) \f$ amount of
+ * work.
+ * -# This constructor allocates \f$ \Theta(\mathit{nz}) \f$ bytes
+ * of dynamic memory.
+ * -# This constructor incurs \f$ \Theta(\mathit{nz}) \f$ of data
+ * movement.
+ * -# This constructor \em may make system calls.
+ * \endparblock
+ *
+ * \warning Avoid the use of this constructor within performance critical
+ * code sections.
+ */
+ Matrix( const Matrix< D, implementation > & other ) {
+ (void)other;
+ }
+
+ /**
+ * Move constructor. This will make the new matrix equal to the given
+ * GraphBLAS matrix while destroying the given matrix.
+ *
+ * @param[in] other The GraphBLAS matrix to move to this new instance.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This constructor entails \f$ \Theta(1) \f$ amount of work.
+ * -# This constructor will not allocate any new dynamic memory.
+ * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of
+ * memory beyond that at constructor entry.
+ * -# This constructor will move \f$ \Theta(1) \f$ bytes of data.
+ * \endparblock
+ */
+ Matrix( self_type && other ) {
+ (void)other;
+ }
+
+ /**
+ * Matrix destructor.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This destructor entails \f$ \Theta(1) \f$ amount of work.
+ * -# This destructor will not perform any memory allocations.
+ * -# This destructor will use \f$ \mathcal{O}(1) \f$ extra bytes of
+ * memory beyond that at constructor entry.
+ * -# This destructor will move \f$ \Theta(1) \f$ bytes of data.
+ * -# This destructor makes system calls.
+ * \endparblock
+ *
+ * \warning Avoid calling destructors from within performance critical
+ * code sections.
+ */
+ ~Matrix() {}
+
+ /**
+ * Assigns nonzeroes to the matrix from a coordinate format.
+ *
+ * Any prior content may be combined with new input according to the
+ * user-supplied accumulator operator (\a accum).
+ *
+ * Input triplets need not be unique. Input triplets that are written to the
+ * same row and column coordinates will be combined using the supplied
+ * duplicate operator (\a dup).
+ *
+ * \note Note that \a dup and \a accum may differ. The duplicate operator is
+ * \em not applied to any pre-existing nonzero values.
+ *
+ * \note The order of application of the operators is undefined.
+ *
+ * The number of nonzeroes, after reduction by duplicate removals and after
+ * merger with the existing nonzero structure, must be equal or less than the
+ * space reserved during the construction of this matrix. The nonzeroes will
+ * not be stored in a fully sorted fashion-- it will be sorted column-wise,
+ * but within each column the order can be arbitrary.
+ *
+ * @tparam accum How existing entries of this matrix should be
+ * treated.
+ * The default is #alp::operators::right_assign, which
+ * means that any existing values are overwritten with
+ * the new values.
+ * @tparam dup How to handle duplicate entries. The default is
+ * #alp::operators::add, which means that duplicated
+ * values are combined by addition.
+ * @tparam descr The descriptor used. The default is
+ * #alp::descriptors::no_operation, which means that
+ * no pre- or post-processing of input or input is
+ * performed.
+ * @tparam fwd_iterator1 The type of the row index iterator.
+ * @tparam fwd_iterator2 The type of the column index iterator.
+ * @tparam fwd_iterator3 The type of the nonzero value iterator.
+ * @tparam length_type The type of the number of elements in each iterator.
+ * @tparam T The type of the supplied mask.
+ *
+ * \note By default, the iterator types are raw, unaliased, pointers.
+ *
+ * \warning This means that by default, input arrays are \em not
+ * allowed to overlap.
+ *
+ * Forward iterators will only be used to read from, never to assign to.
+ *
+ * \note It is therefore both legal and preferred to pass constant forward
+ * iterators, as opposed to mutable ones as \a I, \a J, and \a V.
+ *
+ * @param[in] I A forward iterator to \a cap row indices.
+ * @param[in] J A forward iterator to \a cap column indices.
+ * @param[in] V A forward iterator to \a cap nonzero values.
+ * @param[in] nz The number of items pointed to by \a I, \a J, \em and
+ * \a V.
+ * @param[in] mask An input element at coordinate \f$ (i,j) \f$ will only be
+ * added to this matrix if there exists a matching element
+ * \f$ \mathit{mask}_{ij} \f$ in the given \a mask that
+ * eveluates true . The matrix in \a mask must be
+ * of the same dimension as this matrix.
+ *
+ * @return alp::MISMATCH -# when an element from \a I dereferences to a value
+ * larger than the row dimension of this matrix, or
+ * -# when an element from \a J dereferences to a value
+ * larger than the column dimension of this matrix.
+ * When this error code is returned the state of this
+ * container will be as though this function was never
+ * called; however, the given forward iterators may
+ * have been copied and the copied iterators may have
+ * incurred multiple increments and dereferences.
+ * @return alp::OVERFLW When the internal data type used for storing the
+ * number of nonzeroes is not large enough to store
+ * the number of nonzeroes the user wants to assign.
+ * When this error code is returned the state of this
+ * container will be as though this function was never
+ * called; however, the given forward iterators may
+ * have been copied and the copied iterators may have
+ * incurred multiple increments and dereferences.
+ * @return alp::SUCCESS When the function completes successfully.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function contains
+ * \f$ \Theta(\mathit{nz}\log\mathit{nz})+\mathcal{O}(m+n)) \f$
+ * amount of work.
+ * -# This function may dynamically allocate
+ * \f$ \Theta(\mathit{nz})+\mathcal{O}(m+n)) \f$ bytes of memory.
+ * -# A call to this function will use \f$ \mathcal{O}(m+n) \f$ bytes
+ * of memory beyond the memory in use at the function call entry.
+ * -# This function will copy each input forward iterator at most
+ * \em twice; the three input iterators \a I, \a J, and \a V thus
+ * may have exactly two copies each, meaning that all input may be
+ * traversed \em twice.
+ * -# Each of the at most six iterator copies will be incremented at
+ * most \f$ \mathit{nz} \f$ times.
+ * -# Each position of the each of the at most six iterator copies
+ * will be dereferenced exactly once.
+ * -# This function moves
+ * \f$ \Theta(\mathit{nz})+\mathcal{O}(m+n)) \f$ bytes of data.
+ * -# If the mask is nonempty, the performance costs of alp::eWiseMul
+ * on two matrix arguments must be added to the above costs.
+ * -# This function will likely make system calls.
+ * \endparblock
+ *
+ * \warning This is an extremely expensive function. Use sparingly and only
+ * when absolutely necessary
+ *
+ * \note Streaming input can be implemented by supplying buffered
+ * iterators to this GraphBLAS implementation.
+ */
+ template< Descriptor descr = descriptors::no_operation,
+ template< typename, typename, typename > class accum = operators::right_assign,
+ template< typename, typename, typename > class dup = operators::add,
+ typename fwd_iterator1 = const size_t * __restrict__,
+ typename fwd_iterator2 = const size_t * __restrict__,
+ typename fwd_iterator3 = const D * __restrict__,
+ typename length_type = size_t,
+ typename T >
+ RC buildMatrix( const fwd_iterator1 I, const fwd_iterator2 J, const fwd_iterator3 V, const length_type nz, const Matrix< T, implementation > & mask ) {
+ (void)I;
+ (void)J;
+ (void)V;
+ (void)nz;
+ (void)mask;
+ return PANIC;
+ }
+
+ //@{
+ /**
+ * Provides the only mechanism to extract data from a GraphBLAS matrix.
+ *
+ * The order in which nonzero elements are returned is undefined.
+ *
+ * @return An iterator pointing to the first element of this matrix, if any;
+ * \em or an iterator in end position if this vector contains no
+ * nonzeroes.
+ *
+ * \note An `iterator in end position' compares equal to the const_iterator
+ * returned by cend().
+ *
+ * \parblock
+ * \par Performance semantics
+ * -# This function contains \f$ \mathcal{O}(1) \f$ work.
+ * -# This function is allowed allocate dynamic memory.
+ * -# This function uses up to \f$ \mathcal{O}(1) \f$ more memory
+ * than already used by this application at entry.
+ * -# This function shall move at most \f$ \mathcal{O}(1) \f$ bytes
+ * of data.
+ * -# This function may make system calls.
+ * \endparblock
+ *
+ * \warning Avoid the use of this function within performance critical code
+ * sections.
+ *
+ * \note This function may make use of a const_iterator that is buffered,
+ * hence possibly causing its implicitly called constructor to
+ * allocate dynamic memory.
+ */
+ const_iterator cbegin() const {}
+
+ /**
+ * Same as cbegin().
+ * Since iterators are only supplied as a data extraction mechanism, there
+ * is no overloaded version of this function that returns a non-const
+ * iterator.
+ */
+ const_iterator begin() const {}
+ //@}
+
+ //@{
+ /**
+ * Indicates the end to the elements in this container.
+ *
+ * @return An iterator at the end position of this container.
+ *
+ * \parblock
+ * \par Performance semantics
+ * -# This function contains \f$ \mathcal{O}(1) \f$ work.
+ * -# This function is not allowed allocate dynamic memory.
+ * -# This function uses up to \f$ \mathcal{O}(1) \f$ more memory
+ * than already used by this application at entry.
+ * -# This function shall move at most \f$ \mathcal{O}(1) \f$ bytes
+ * of data.
+ * -# This function shall \em not induce any system calls.
+ * \endparblock
+ *
+ * \note Even if cbegin() returns a buffered const_iterator that may require
+ * dynamic memory allocation and additional data movement, this
+ * specification disallows the same to happen for the construction of
+ * an iterator in end position.
+ */
+ const_iterator cend() const {}
+
+ /**
+ * Same as cend().
+ * Since iterators are only supplied as a data extraction mechanism, there
+ * is no overloaded version of this function that returns a non-const
+ * iterator.
+ */
+ const_iterator end() const {}
+ //@}
+
+ template< typename InputType, Backend backend >
+ RC clear( Matrix< InputType, backend > & A ) noexcept {
+ // this is the generic stub implementation
+ return UNSUPPORTED;
+ }
+ }; // class Matrix
+ } // namespace internal
+
+
+ template< typename T, typename Structure, enum Density density, typename View,
+ typename ImfR, typename ImfC, enum Backend backend >
+ class Matrix;
+
+ // These two comments are left here until a better place is found for them.
+ /**
+ * When a structured matrix instanciate a \em container it defines a new \em physical
+ * (concrete?) layout. This is characterized by an ALP container (aka an \a internal::Matrix) and a
+ * storage scheme that defines a unique interpretation of its content.
+ * The combination of the logical and physical layout of a structured matrix enables to
+ * identify a precise mapping between an element in the structured matrix and a position
+ * wihtin one or more 1/2D-arrays that store it.
+ */
+ //internal::Matrix< T, reference > * _container;
+
+ /**
+ * A container's storage scheme. \a storage_scheme is not exposed to the user as an option
+ * but can defined by ALP at different points in the execution depending on the \a backend choice.
+ * For example, if the container is associated to an I/O matrix, with a reference backend
+ * it might be set to reflect the storage scheme of the user data as specified at buildMatrix.
+ * If \a backend is set to \a mlir then the scheme could be fixed by the JIT compiler to effectively
+ * support its optimization strategy.
+ * At construction time and until the moment the scheme decision is made it may be set to
+ * an appropriate default choice, e.g. if \a density is \a Density::Dense then
+ * \a Density::Dense::full could be used.
+ * \internal \todo Revisit this. The change of storage scheme type to enum (dense/sparse) and
+ * implementing storage mapping functions requires a change of this spec.
+ */
+ // Storage storage_scheme;
+
+ /**
+ * Check if type \a T is a Matrix.
+ */
+ template< typename T >
+ struct is_structured_matrix : std::false_type {};
+ template< typename T, typename Structure, enum Density density, typename View, typename ImfR, typename ImfC, enum Backend backend >
+ struct is_structured_matrix< Matrix< T, Structure, density, View, ImfR, ImfC, backend > > : std::true_type {};
+
+} // end namespace ``alp''
+
+#endif // end _H_ALP_MATRIX_BASE
diff --git a/include/alp/base/scalar.hpp b/include/alp/base/scalar.hpp
new file mode 100644
index 000000000..45a36c7d9
--- /dev/null
+++ b/include/alp/base/scalar.hpp
@@ -0,0 +1,229 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _H_ALP_SCALAR_BASE
+#define _H_ALP_SCALAR_BASE
+
+#include //size_t
+#include
+
+#include
+#include
+#include
+#include
+
+
+namespace alp {
+
+ /**
+ * \brief An ALP scalar.
+ *
+ * This is an opaque data type for scalars.
+ *
+ * @tparam T The type of the vector elements. \a T shall not
+ * be an ALP type.
+ * @tparam Structure One of the structures. One of possible use cases
+ * for a structured scalar is a random structure.
+ * Depending on the backend implementation, this may mean,
+ * for example, randomizing the scalar value on each
+ * interaction with the scalar.
+ *
+ * \warning Creating a alp::Scalar of other ALP types is
+ * not allowed .
+ * Passing a ALP type as template parameter will lead to
+ * undefined behaviour.
+ *
+ */
+ template< typename T, typename Structure, enum Backend backend >
+ class Scalar {
+
+ public:
+ /** @see Vector::value_type. */
+ typedef T value_type;
+
+ /** @see Vector::lambda_reference */
+ typedef T& lambda_reference;
+
+ /**
+ * The default ALP scalar constructor.
+ *
+ * The constructed object will be uninitalised after successful construction.
+ *
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This constructor entails \f$ \Theta(1) \f$ amount of work.
+ * -# This constructor may allocate \f$ \Theta(1) \f$ bytes
+ * of dynamic memory.
+ * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of
+ * memory beyond that at constructor entry.
+ * -# This constructor incurs \f$ \Theta(1) \f$ data movement.
+ * -# This constructor \em may make system calls.
+ * \endparblock
+ *
+ */
+ Scalar() {}
+
+ /**
+ * The ALP scalar constructor for converting a reference to C/C++ scalar
+ * to ALP scalar.
+ *
+ * The constructed object will be initialized after successful construction.
+ *
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This constructor entails \f$ \Theta(1) \f$ amount of work.
+ * -# This constructor may allocate \f$ \Theta(1) \f$ bytes
+ * of dynamic memory.
+ * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of
+ * memory beyond that at constructor entry.
+ * -# This constructor incurs \f$ \Theta(1) \f$ data movement.
+ * -# This constructor \em may make system calls.
+ * \endparblock
+ *
+ * \warning This constructor saves the reference to the provied value.
+ * Therefore, the changes to the container or the value will
+ * be mirrored into each-other. For preserving the separation,
+ * use Scalar( const T ) version.
+ *
+ */
+ explicit Scalar( T &value ) {
+ (void)value;
+ }
+
+ /**
+ * The ALP scalar constructor for converting a C/C++ scalar to ALP scalar.
+ *
+ * The constructed object will be initialized after successful construction.
+ *
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This constructor entails \f$ \Theta(1) \f$ amount of work.
+ * -# This constructor may allocate \f$ \Theta(1) \f$ bytes
+ * of dynamic memory.
+ * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of
+ * memory beyond that at constructor entry.
+ * -# This constructor incurs \f$ \Theta(1) \f$ data movement.
+ * -# This constructor \em may make system calls.
+ * \endparblock
+ *
+ */
+ explicit Scalar( T value ) {
+ (void)value;
+ }
+
+ /**
+ * Copy constructor.
+ *
+ * @param other The scalar to copy. The initialization state of the copy
+ * reflects the state of \a other.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This constructor entails \f$ \Theta(1) \f$ amount of work.
+ * -# This constructor allocates \f$ \Theta(1) \f$ bytes
+ * of dynamic memory.
+ * -# This constructor incurs \f$ \Theta(1) \f$ of data
+ * movement.
+ * -# This constructor \em may make system calls.
+ * \endparblock
+ *
+ */
+ Scalar( const Scalar &other ) noexcept {
+ (void)other;
+ }
+
+ /**
+ * Move constructor. The new scalar equals the given
+ * scalar. Invalidates the use of the input scalar.
+ *
+ * @param[in] other The ALP scalar to move to this new instance.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This constructor entails \f$ \Theta(1) \f$ amount of work.
+ * -# This constructor will not allocate any new dynamic memory.
+ * -# This constructor will use \f$ \Theta(1) \f$ extra bytes of
+ * memory beyond that at constructor entry.
+ * -# This constructor will move \f$ \Theta(1) \f$ bytes of data.
+ * \endparblock
+ */
+ Scalar( Scalar &&other ) noexcept {
+ (void)other;
+ }
+
+ /**
+ * Returns a lambda reference to the value of this Scalar. The user
+ * ensures that the requested reference only corresponds to a pre-existing
+ * nonzero in this scalar, or undefined behaviour will occur .
+ * This addresses the sparse specialization of scalars. In the dense
+ * context, scalar is considered to have a nonzero value \em iff initialized.
+ *
+ * A lambda reference to the value of this scalar is only valid when used
+ * inside a lambda function evaluated via alp::eWiseLambda. Outside this
+ * scope the returned reference incurs undefined behaviour.
+ *
+ *
+ * \warning In parallel contexts the use of a returned lambda reference
+ * outside the context of an eWiseLambda will incur at least one of
+ * the following ill effects: it may
+ * -# fail outright,
+ * -# work on stale data,
+ * -# work on incorrect data, or
+ * -# incur high communication costs to guarantee correctness.
+ * In short, such usage causes undefined behaviour. Implementers are
+ * \em not advised to provide GAS-like functionality through this
+ * interface, as it invites bad programming practices and bad
+ * algorithm design decisions. This operator is instead intended to
+ * provide for generic BLAS0-type operations only.
+ *
+ * \note For I/O, use the iterator retrieved via cbegin() instead of
+ * relying on a lambda_reference.
+ *
+ * @return A lambda reference to the value of this scalar
+ *
+ * \par Example.
+ * See alp::eWiseLambda() for a practical and useful example.
+ *
+ * \warning There is no similar concept in the official GraphBLAS specs.
+ *
+ * @see lambda_reference For more details on the returned reference type.
+ * @see alp::eWiseLambda For one legal way in which to use the returned
+ * #lambda_reference.
+ */
+ lambda_reference operator*() noexcept {
+#ifndef _ALP_NO_EXCEPTIONS
+ assert( false ); // Requesting lambda reference of unimplemented Scalar backend.
+#endif
+ }
+
+ /** Returns a constant reference to the scalar value.
+ * See the non-constant variant for further details.
+ */
+ const lambda_reference operator*() const noexcept {
+#ifndef _ALP_NO_EXCEPTIONS
+ assert( false ); // Requesting lambda reference of unimplemented Scalar backend.
+#endif
+ }
+
+ }; // class Scalar
+
+} // namespace alp
+
+#endif // _H_ALP_SCALAR_BASE
diff --git a/include/alp/base/vector.hpp b/include/alp/base/vector.hpp
new file mode 100644
index 000000000..4ba5440a8
--- /dev/null
+++ b/include/alp/base/vector.hpp
@@ -0,0 +1,892 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 10th of August, 2016
+ */
+
+#ifndef _H_ALP_VECTOR_BASE
+#define _H_ALP_VECTOR_BASE
+
+#include //size_t
+#include //std::iterator
+#include
+#include //pair
+
+#include
+#include
+#include
+#include
+#include
+
+
+namespace alp {
+ namespace internal {
+ /**
+ * A GraphBLAS vector. This is an opaque data type that can be provided to
+ * any GraphBLAS function, such as, alp::eWiseMulAdd, for example.
+ *
+ * @tparam D The type of an element of this vector. \a D shall not be a
+ * GraphBLAS type.
+ * @tparam implementation Allows different backends to implement different
+ * versions of this data type.
+ *
+ * \warning Creating a alp::Vector of other GraphBLAS types is
+ * not allowed .
+ * Passing a GraphBLAS type as template parameter will lead to
+ * undefined behaviour.
+ *
+ * \note The implementation found in the same file as this documentation
+ * catches invalid backends only. This class should never compile.
+ *
+ * @see alp::Vector< D, reference > for an actual implementation example.
+ */
+ template< typename D, enum Backend implementation >
+ class Vector {
+
+ public :
+
+ /** The type of elements stored in this vector. */
+ typedef D value_type;
+
+ /**
+ * Defines a reference to a value of type \a D. This reference is only valid
+ * when used inside a lambda function that is passed to alp::eWiseLambda().
+ *
+ * \warning Any other use of this reference incurs undefined behaviour.
+ *
+ * \par Example.
+ * An example valid use:
+ * \code
+ * void f(
+ * Vector< D >::lambda_reference x,
+ * const Vector< D >::lambda_reference y,
+ * const Vector< D > &v
+ * ) {
+ * alp::eWiseLambda( [x,y](const size_t i) {
+ * x += y;
+ * }, v );
+ * }
+ * \endcode
+ * This code adds \a y to \a x for every element in \a v. For a more useful
+ * example, see alp::eWiseLambda.
+ *
+ * \warning Note that, unlike the above, this below code is illegal since it
+ * does not evaluate via a lambda passed to any of the above
+ * GraphBLAS lambda functions (such as alp::eWiseLambda).
+ * \code{.cpp}
+ * void f(
+ * Vector< D >::lambda_reference x,
+ * const Vector< D >::lambda_reference y
+ * ) {
+ * x += y;
+ * }
+ * \endcode
+ * Also this usage is illegal since it does not rely on any
+ * GraphBLAS-approved function listed above:
+ * \code{.cpp}
+ * void f(
+ * Vector< D >::lambda_reference x,
+ * const Vector< D >::lambda_reference y
+ * ) {
+ * std::functional< void() > f =
+ * [x,y](const size_t i) {
+ * x += y;
+ * };
+ * f();
+ * }
+ * \endcode
+ *
+ * \warning There is no similar concept in the official GraphBLAS specs.
+ *
+ * @see alp::Vector::operator[]()
+ * @see alp::eWiseLambda
+ */
+ typedef D & lambda_reference;
+
+ /**
+ * A standard iterator for the Vector< D > class.
+ *
+ * This iterator is used for data extraction only. Hence only this const
+ * version is supplied.
+ *
+ * \warning Comparing two const iterators corresponding to different
+ * containers leads to undefined behaviour.
+ * \warning Advancing an iterator past the end iterator of the container
+ * it corresponds to leads to undefined behaviour.
+ * \warning Modifying the contents of a container makes any use of any
+ * iterator derived from it incur invalid behaviour.
+ * \note These are standard limitations of STL iterators.
+ */
+ class const_iterator : public std::iterator< std::forward_iterator_tag, std::pair< const size_t, const D >, size_t > {
+
+ public :
+
+ /** Standard equals operator. */
+ bool
+ operator==( const const_iterator & other ) const { (void)other; return false; }
+
+ /** @returns The negation of operator==(). */
+ bool operator!=( const const_iterator & other ) const {
+ (void)other;
+ return true;
+ }
+
+ /**
+ * Dereferences the current position of this iterator.
+ *
+ * @return If this iterator is valid and not in end position,
+ * this returns a new std::pair with in its first
+ * field the position of the nonzero value, and in its
+ * second field the value of the nonzero.
+ *
+ * \note If this iterator is invalid or in end position, the result is,
+ * undefined.
+ */
+ std::pair< const size_t, const D > operator*() const {
+ return std::pair< const size_t, const D >();
+ }
+
+ /**
+ * Advances the position of this iterator by one.
+ *
+ * If the current position corresponds to the last element in the
+ * container, the new position of this iterator will be its end
+ * position.
+ *
+ * If the current position of this iterator is already the end
+ * position, this iterator will become invalid; any use of invalid
+ * iterators will lead to undefined behaviour.
+ *
+ * @return A reference to this iterator.
+ */
+ const_iterator & operator++() {
+ return *this;
+ }
+
+ }; // class const_iterator
+
+ /**
+ * The only way to create an empty GraphBLAS vector. The given dimension will
+ * be fixed throughout the lifetime of this container.
+ *
+ * The vector will be empty after successful construction.
+ *
+ * @param[in] n The dimension of this vector.
+ *
+ * \parblock
+ * \par Performance semantics
+ * -# This constructor completes in \f$ \mathcal{O}(n) \f$ time.
+ * -# This constructor allocates \f$ \mathcal{O}(n) \f$ bytes of
+ * dynamic memory.
+ * -# This constructor moves at most \f$ \mathcal{O}( n ) \f$ bytes
+ * of data.
+ * -# This constructor may make system calls.
+ * \endparblock
+ *
+ * \warning Avoid the use of this constructor within performance critical
+ * code sections.
+ */
+ Vector( const size_t n ) {
+ (void)n;
+ }
+
+ /**
+ * Move constructor.
+ *
+ * This will make the new vector equal the given GraphBLAS vector while
+ * destroying the supplied GraphBLAS vector.
+ *
+ * This function always succeeds and will not throw exceptions.
+ *
+ * @param[in] x The GraphBLAS vector to move to this new container.
+ *
+ * \parblock
+ * \par Performance semantics
+ * -# This constructor completes in \f$ \Theta(1) \f$ time.
+ * -# This constructor does not allocate new data on the heap.
+ * -# This constructor uses \f$ \mathcal{O}(1) \f$ more memory than
+ * already used by this application at constructor entry.
+ * -# This constructor incurs at most \f$ \mathcal{O}(1) \f$ bytes of
+ * data movement.
+ * \endparblock
+ */
+ Vector( Vector< D, implementation > &&x ) noexcept {
+ (void)x;
+ }
+
+ /**
+ * Move-from-temporary assignment.
+ *
+ * @param[in,out] x The temporary instance from which this instance shall
+ * take over its resources.
+ *
+ * After a call to this function, \a x shall correspond to an empy vector.
+ *
+ * \parblock
+ * \par Performance semantics
+ * -# This move assignment completes in \f$ \Theta(1) \f$ time.
+ * -# This move assignment may not make system calls.
+ * -# this move assignment moves \f$ \Theta(1) \f$ data only.
+ * \endparblock
+ */
+ Vector< D, implementation >& operator=( Vector< D, implementation > &&x ) noexcept {
+ (void)x;
+ return *this;
+ }
+
+ /**
+ * Default destructor. Frees all associated memory areas.
+ *
+ * \parblock
+ * \par Performance semantics
+ * -# This destructor contains \f$ \mathcal{O}(n) \f$ work, where
+ * \f$ n \f$ is the capacity of this vector.
+ * -# This destructor is only allowed to free memory, not allocate.
+ * -# This destructor uses \f$ \mathcal{O}(1) \f$ more memory than
+ * already used by this application at entry.
+ * -# This destructor shall move at most \f$ \mathcal{O}(n) \f$ bytes
+ * of data.
+ * -# This destructor will make system calls.
+ * \endparblock
+ *
+ * \warning Avoid the use of this destructor within performance critical
+ * code sections.
+ *
+ * \note Destruction of this GraphBLAS container is the only way to
+ * guarantee that any underlying dynamically allocated memory is
+ * freed.
+ */
+ ~Vector() {}
+
+ //@{
+ /**
+ * Provides the only mechanism to extract data from this GraphBLAS vector.
+ *
+ * The order in which nonzero elements are returned is undefined.
+ *
+ * @return An iterator pointing to the first element of this vector, if any;
+ * \em or an iterator in end position if this vector contains no
+ * nonzeroes.
+ *
+ * \note An `iterator in end position' compares equal to the const_iterator
+ * returned by cend().
+ *
+ * \parblock
+ * \par Performance semantics
+ * -# This function contains \f$ \mathcal{O}(1) \f$ work.
+ * -# This function is allowed allocate dynamic memory.
+ * -# This function uses up to \f$ \mathcal{O}(1) \f$ more memory
+ * than already used by this application at entry.
+ * -# This function shall move at most \f$ \mathcal{O}(1) \f$ bytes
+ * of data.
+ * -# This function may make system calls.
+ * \endparblock
+ *
+ * \warning Avoid the use of this function within performance critical code
+ * sections.
+ *
+ * \note This function may make use of a const_iterator that is buffered,
+ * hence possibly causing its implicitly called constructor to
+ * allocate dynamic memory.
+ */
+ const_iterator cbegin() const {}
+
+ /**
+ * Same as cbegin().
+ * Since iterators are only supplied as a data extraction mechanism, there
+ * is no overloaded version of this function that returns a non-const
+ * iterator.
+ */
+ const_iterator begin() const {}
+ //@}
+
+ //@{
+ /**
+ * Indicates the end to the elements in this container.
+ *
+ * @return An iterator at the end position of this container.
+ *
+ * \parblock
+ * \par Performance semantics
+ * -# This function contains \f$ \mathcal{O}(1) \f$ work.
+ * -# This function is not allowed allocate dynamic memory.
+ * -# This function uses up to \f$ \mathcal{O}(1) \f$ more memory
+ * than already used by this application at entry.
+ * -# This function shall move at most \f$ \mathcal{O}(1) \f$ bytes
+ * of data.
+ * -# This function shall \em not induce any system calls.
+ * \endparblock
+ *
+ * \note Even if cbegin() returns a buffered const_iterator that may require
+ * dynamic memory allocation and additional data movement, this
+ * specification disallows the same to happen for the construction of
+ * an iterator in end position.
+ */
+ const_iterator cend() const {}
+
+ /**
+ * Same as cend().
+ * Since iterators are only supplied as a data extraction mechanism, there
+ * is no overloaded version of this function that returns a non-const
+ * iterator.
+ */
+ const_iterator end() const {}
+ //@}
+
+ /**
+ * Copy from raw user-supplied data into a vector.
+ *
+ * This is the dense unmasked variant.
+ *
+ * @tparam descr The pre-processing descriptor to use.
+ * @tparam fwd_iterator The type of input iterator. By default, this will be
+ * a raw \em unaliased pointer.
+ * @tparam Accum The accumulator type used to merge incoming new
+ * elements with existing contents, if any.
+ *
+ * @param[in] accum The accumulator used to merge incoming new elements with
+ * existing content, if any.
+ * @param[in] start The iterator to the first element that should be copied
+ * into this GraphBLAS vector.
+ * @param[in] end Iterator shifted exactly one past the last element that
+ * should be copied into this GraphBLAS vector.
+ * @param[out] npos The last iterator position after exiting this function.
+ * In most cases this will equal \a end. This parameter is
+ * optional.
+ *
+ * The first element from \a it will be copied into the element with index
+ * \f$ 0 \f$ in this vector. The \f$ k \f$-th element will be copied into
+ * the element with index \f$ k - 1 \f$. The iterator \a start will be
+ * incremented along with \f$ k \f$ until it compares equal to \a end, or
+ * until it has been incremented \a n times, where \a n is the dimension of
+ * this vector. In the latter case, any remaining values are ignored.
+ *
+ * @return alp::SUCCESS This function always succeeds.
+ *
+ * \note The default accumulator expects \a val to be of the same type
+ * as nonzero elements in this function, and will cause old
+ * values to be overwritten by the incoming new values.
+ *
+ * \note Previous contents of the vector are retained. If these are to be
+ * cleared first, see clear(). The default accumulator is NOT an
+ * alternative since any pre-existing values corresponding to entries
+ * in the mask that evaluate to false will be retained.
+ *
+ * \note The parameter \a n can be used to ingest only a subset of a larger
+ * data structure pointed to by \a start. At the end of the call, \a
+ * start will then not be equal to \a end, but instead point to the
+ * first element of the remainder of the larger data structure.
+ *
+ * \par Valid descriptors
+ * alp::descriptors::no_operation, alp::descriptors::no_casting.
+ *
+ * \note Invalid descriptors will be ignored.
+ *
+ * If alp::descriptors::no_casting is specified, then 1) the first domain of
+ * \a accum must match the type of \a val, 2) the second domain must match
+ * the type \a D of nonzeroes in this vector, and 3) the third domain must
+ * match \a D. If one of these is not true, the code shall not compile.
+ *
+ * \parblock
+ * \par Performance semantics
+ * If the capacity of this container is sufficient to perform the
+ * requested operation, then:
+ * -# This function contains \f$ \Theta(n) \f$ work.
+ * -# This function will take at most \f$ \Theta(1) \f$ memory beyond
+ * the memory already used by the application before the call to
+ * this function.
+ * -# This function moves at most \f$ n ( 2\mathit{sizeof}(D) +
+ * \mathit{sizeof}(\mathit{bool}) ) + \mathcal{O}(1) \f$ bytes of
+ * data.
+ * \endparblock
+ *
+ * \parblock
+ * \par Performance exceptions
+ * If the capacity of this container at function entry is insufficient
+ * to perform the requested operation, then, in addition to the above:
+ * -# this function allocates \f$ \Theta(n) \f$ bytes of memory .
+ * -# this function frees \f$ \mathcal{O}(n) \f$ bytes of memory.
+ * -# this function will make system calls.
+ * \endparblock
+ *
+ * \note An implementation may ensure that at object construction the
+ * capacity is maximised. In that case, the above performance
+ * exceptions will never come to pass.
+ *
+ * @see alp::buildVector for the GraphBLAS standard dispatcher to this
+ * function.
+ */
+ template< Descriptor descr = descriptors::no_operation, class Accum = typename operators::right_assign< D, D, D >, typename fwd_iterator = const D * __restrict__ >
+ RC build( const Accum & accum, const fwd_iterator start, const fwd_iterator end, fwd_iterator npos ) {
+ (void)accum;
+ (void)start;
+ (void)end;
+ (void)npos;
+ return PANIC;
+ }
+
+ /**
+ * Copy from raw user-supplied data into a vector.
+ *
+ * This is the sparse non-masked variant.
+ *
+ * @tparam descr The pre-processing descriptor to use.
+ * @tparam Accum The type of the operator used to combine newly input
+ * data with existing data, if any.
+ * @tparam ind_iterator The type of index input iterator. By default, this
+ * will be a raw \em unaliased pointer to elements of
+ * type \a size_t.
+ * @tparam nnz_iterator The type of nonzero input iterator. By default, this
+ * will be a raw \em unaliased pointer to elements of
+ * type \a D.
+ * @tparam Dup The type of operator used to combine any duplicate
+ * input values.
+ *
+ * @param[in] accum The operator to be used when writing back the result
+ * of data that was already in this container prior to
+ * calling this function.
+ * @param[in] ind_start The iterator to the first index value that should be
+ * added to this GraphBLAS vector.
+ * @param[in] ind_end Iterator corresponding to the end position of
+ * \a ind_start.
+ * @param[in] nnz_start The iterator to the first nonzero value that should
+ * be added to this GraphBLAS vector.
+ * @param[in] nnz_end Iterator corresponding to the end position of
+ * \a nnz_start.
+ * @param[in] dup The operator to be used when handling multiple
+ * nonzero values that are to be mapped to the same
+ * index position.
+ *
+ * The first element from \a nnz_start will be copied into this vector at
+ * the index corresponding to the first element from \a ind_start. Then,
+ * both nonzero and index value iterators advance to add the next input
+ * element and the process repeats until either of the input iterators
+ * reach \a nnz_end or \a ind_end, respectively.
+ * If at that point one of the iterators still has remaining elements, then
+ * those elements are ignored.
+ *
+ * @return alp::MISMATCH When attempting to insert a nonzero value at an
+ * index position that is larger or equal to the
+ * dimension of this vector. When this code is
+ * returned, the contents of this container are
+ * undefined.
+ * @return alp::SUCCESS When all elements are successfully assigned.
+ *
+ * \note The default accumulator expects \a D to be of the same type
+ * as nonzero elements of this operator, and will cause old
+ * values to be overwritten by the incoming new values.
+ *
+ * \note The default \a dup expects \a D to be of the same type as nonzero
+ * elements of this operator, and will cause duplicate values to be
+ * discarded in favour of the last seen value.
+ *
+ * \note Previous contents of the vector are retained. If these are to be
+ * cleared first, see clear(). The default accumulator is NOT an
+ * alternative since any pre-existing values corresponding to entries
+ * in the mask that evaluate to false will be retained.
+ *
+ * \par Valid descriptors
+ * alp::descriptors::no_operation, alp::descriptors::no_casting,
+ * alp::descriptors::no_duplicates.
+ *
+ * \note Invalid descriptors will be ignored.
+ *
+ * If alp::descriptors::no_casting is specified, then 1) the first domain of
+ * \a accum must match the type of \a D, 2) the second domain must match
+ * nnz_iterator::value_type, and 3) the third domain must \a D. If one of
+ * these is not true, the code shall not compile.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function contains \f$ \Theta(n) \f$ work.
+ * -# This function will take at most \f$ \Theta(1) \f$ memory beyond
+ * the memory already used by the application before the call to
+ * this function.
+ * -# This function moves at most \f$ n ( 2\mathit{sizeof}(D) +
+ * \mathit{sizeof}(\mathit{bool}) ) + \mathcal{O}(1) \f$ bytes of
+ * data.
+ * \endparblock
+ *
+ * \parblock
+ * \par Performance exceptions
+ * If the capacity of this container at function entry is insufficient
+ * to perform the requested operation, then, in addition to the above:
+ * -# this function allocates \f$ \Theta(n) \f$ bytes of memory .
+ * -# this function frees \f$ \mathcal{O}(n) \f$ bytes of memory.
+ * -# this function will make system calls.
+ * \endparblock
+ *
+ * \note An implementation may ensure that at object construction the
+ * capacity is maximised. In that case, the above performance
+ * exceptions will never come to pass.
+ *
+ * @see alp::buildVector for the GraphBLAS standard dispatcher to this
+ * function.
+ */
+ template< Descriptor descr = descriptors::no_operation,
+ class Accum = operators::right_assign< D, D, D >,
+ typename ind_iterator = const size_t * __restrict__,
+ typename nnz_iterator = const D * __restrict__,
+ class Dup = operators::right_assign< D, D, D > >
+ RC build( const Accum & accum, const ind_iterator ind_start, const ind_iterator ind_end, const nnz_iterator nnz_start, const nnz_iterator nnz_end, const Dup & dup = Dup() ) {
+ (void)accum;
+ (void)ind_start;
+ (void)ind_end;
+ (void)nnz_start;
+ (void)nnz_end;
+ (void)dup;
+ return PANIC;
+ }
+
+ /**
+ * Copy from raw user-supplied data into a vector.
+ *
+ * This is the sparse masked variant.
+ *
+ * @tparam descr The pre-processing descriptor to use.
+ * @tparam mask_type The value type of the \a mask vector. This type is
+ * \em not required to be \a bool.
+ * @tparam Accum The type of the operator used to combine newly input
+ * data with existing data, if any.
+ * @tparam ind_iterator The type of index input iterator. By default, this
+ * will be a raw \em unaliased pointer to elements of
+ * type \a size_t.
+ * @tparam nnz_iterator The type of nonzero input iterator. By default, this
+ * will be a raw \em unaliased pointer to elements of
+ * type \a D.
+ * @tparam Dup The type of operator used to combine any duplicate
+ * input values.
+ *
+ * @param[in] mask An element is only added to this container if its
+ * index \f$ i \f$ has a nonzero at the same position
+ * in \a mask that evaluates true.
+ * @param[in] accum The operator to be used when writing back the result
+ * of data that was already in this container prior to
+ * calling this function.
+ * @param[in] ind_start The iterator to the first index value that should be
+ * added to this GraphBLAS vector.
+ * @param[in] ind_end Iterator corresponding to the end position of
+ * \a ind_start.
+ * @param[in] nnz_start The iterator to the first nonzero value that should
+ * be added to this GraphBLAS vector.
+ * @param[in] nnz_end Iterator corresponding to the end position of
+ * \a nnz_start.
+ * @param[in] dup The operator to be used when handling multiple
+ * nonzero values that are to be mapped to the same
+ * index position.
+ *
+ * The first element from \a nnz_start will be copied into this vector at
+ * the index corresponding to the first element from \a ind_start. Then,
+ * both nonzero and index value iterators advance to add the next input
+ * element and the process repeats until either of the input iterators
+ * reach \a nnz_end or \a ind_end, respectively.
+ * If at that point one of the iterators still has remaining elements, then
+ * those elements are ignored.
+ *
+ * @return alp::MISMATCH When attempting to insert a nonzero value at an
+ * index position that is larger or equal to the
+ * dimension of this vector. When this code is
+ * returned, the contents of this container are
+ * undefined.
+ * @return alp::SUCCESS When all elements are successfully assigned.
+ *
+ * \note The default accumulator expects \a D to be of the same type
+ * as nonzero elements of this operator, and will cause old
+ * values to be overwritten by the incoming new values.
+ *
+ * \note The default \a dup expects \a D to be of the same type as nonzero
+ * elements of this operator, and will cause duplicate values to be
+ * discarded in favour of the last seen value.
+ *
+ * \note Previous contents of the vector are retained. If these are to be
+ * cleared first, see clear(). The default accumulator is NOT an
+ * alternative since any pre-existing values corresponding to entries
+ * in the mask that evaluate to false will be retained.
+ *
+ * \par Valid descriptors
+ * alp::descriptors::no_operation, alp::descriptors::no_casting,
+ * alp::descriptors::invert_mask, alp::descriptors::no_duplicates.
+ *
+ * \note Invalid descriptors will be ignored.
+ *
+ * If alp::descriptors::no_casting is specified, then 1) the first domain of
+ * \a accum must match the type of \a D, 2) the second domain must match
+ * nnz_iterator::value_type, and 3) the third domain must \a D. If one of
+ * these is not true, the code shall not compile.
+ *
+ * \parblock
+ * \par Performance semantics.
+ * -# This function contains \f$ \Theta(n) \f$ work.
+ * -# This function will take at most \f$ \Theta(1) \f$ memory beyond
+ * the memory already used by the application before the call to
+ * this function.
+ * -# This function moves at most \f$ n ( 2\mathit{sizeof}(D) +
+ * \mathit{sizeof}(\mathit{bool}) ) + \mathcal{O}(1) \f$ bytes of
+ * data.
+ * \endparblock
+ *
+ * \parblock
+ * \par Performance exceptions
+ * If the capacity of this container at function entry is insufficient
+ * to perform the requested operation, then, in addition to the above:
+ * -# this function allocates \f$ \Theta(n) \f$ bytes of memory .
+ * -# this function frees \f$ \mathcal{O}(n) \f$ bytes of memory.
+ * -# this function will make system calls.
+ * \endparblock
+ *
+ * \note An implementation may ensure that at object construction the
+ * capacity is maximised. In that case, the above performance
+ * exceptions will never come to pass.
+ *
+ * @see alp::buildVector for the GraphBLAS standard dispatcher to this
+ * function.
+ */
+ template< Descriptor descr = descriptors::no_operation,
+ typename mask_type,
+ class Accum,
+ typename ind_iterator = const size_t * __restrict__,
+ typename nnz_iterator = const D * __restrict__,
+ class Dup = operators::right_assign< D, typename nnz_iterator::value_type, D > >
+ RC build( const Vector< mask_type, implementation > mask,
+ const Accum & accum,
+ const ind_iterator ind_start,
+ const ind_iterator ind_end,
+ const nnz_iterator nnz_start,
+ const nnz_iterator nnz_end,
+ const Dup & dup = Dup() ) {
+ (void)mask;
+ (void)accum;
+ (void)ind_start;
+ (void)ind_end;
+ (void)nnz_start;
+ (void)nnz_end;
+ (void)dup;
+ return PANIC;
+ }
+
+ /**
+ * Return the dimension of this vector.
+ *
+ * @tparam T The integral output type.
+ *
+ * @param[out] size Where to store the size of this vector.
+ * The initial value is ignored.
+ *
+ * @returns alp::SUCCESS When the function call completes successfully.
+ *
+ * \note This function cannot fail.
+ *
+ * \parblock
+ * \par Performance semantics
+ * This function
+ * -# contains \f$ \Theta(1) \f$ work,
+ * -# will not allocate new dynamic memory,
+ * -# will take at most \f$ \Theta(1) \f$ memory beyond the memory
+ * already used by the application before the call to this
+ * function.
+ * -# will move at most \f$ \mathit{sizeof}(T) +
+ * \mathit{sizeof}(\mathit{size\_t}) \f$ bytes of data.
+ * \endparblock
+ */
+ template< typename T >
+ RC size( T & size ) const {
+ (void)size;
+ return PANIC;
+ }
+
+ /**
+ * Return the number of nonzeroes in this vector.
+ *
+ * @tparam T The integral output type.
+ *
+ * @param[out] nnz Where to store the number of nonzeroes contained in this
+ * vector. Its initial value is ignored.
+ *
+ * @returns alp::SUCCESS When the function call completes successfully.
+ *
+ * \note This function cannot fail.
+ *
+ * \parblock
+ * \par Performance semantics
+ * This function
+ * -# contains \f$ \Theta(1) \f$ work,
+ * -# will not allocate new dynamic memory,
+ * -# will take at most \f$ \Theta(1) \f$ memory beyond the memory
+ * already used by the application before the call to this
+ * function.
+ * -# will move at most \f$ \mathit{sizeof}(T) +
+ * \mathit{sizeof}(\mathit{size\_t}) \f$ bytes of data.
+ * \endparblock
+ */
+ template< typename T >
+ RC nnz( T & nnz ) const {
+ (void)nnz;
+ return PANIC;
+ }
+
+ /**
+ * Returns a lambda reference to an element of this sparse vector.
+ *
+ * A lambda reference to an element of this vector is only valid when used
+ * inside a lambda function evaluated via alp::eWiseLambda. The lambda
+ * function is called for specific indices only-- that is, the GraphBLAS
+ * implementation decides at which elements to dereference this container.
+ * Outside this scope the returned reference incurs undefined behaviour.
+ *
+ * \warning In particular, for the given index \a i by the lambda function,
+ * it shall be \em illegal to refer to indices relative to that
+ * \a i; including, but not limited to, \f$ i+1 \f$, \f$ i-1 \f$, et
+ * cetera.
+ *
+ * \note As a consequence, this function cannot be used to perform stencil
+ * or halo based operations.
+ *
+ * If a previously non-existing entry of the vector is requested, a new
+ * nonzero is added at position \a i in this vector. The new element will
+ * have its initial value equal to the \em identity corresponding to the
+ * given monoid.
+ *
+ * \warning In parallel contexts the use of a returned lambda reference
+ * outside the context of an eWiseLambda will incur at least one of
+ * the following ill effects: it may
+ * -# fail outright,
+ * -# work on stale data,
+ * -# work on incorrect data, or
+ * -# incur high communication costs to guarantee correctness.
+ * In short, such usage causes undefined behaviour. Implementers are
+ * \em not advised to provide GAS-like functionality through this
+ * interface, as it invites bad programming practices and bad
+ * algorithm design decisions. This operator is instead intended to
+ * provide for generic BLAS1-type operations only.
+ *
+ * \note For I/O, use the iterator retrieved via cbegin() instead of
+ * relying on a lambda_reference.
+ *
+ * @param[in] i Which element to return a lambda reference of.
+ * @param[in] monoid Under which generalised monoid to interpret the
+ * requested \f$ i \f$th element of this vector.
+ *
+ * \note The \a monoid (or a ring) is required to be able to interpret a
+ * sparse vector. A user who is sure this vector is dense, or otherwise
+ * is able to ensure that the a lambda_reference will only be requested
+ * at elements where nonzeroes already exists, may refer to
+ * Vector::operator[],
+ *
+ * @return A lambda reference to the element \a i of this vector.
+ *
+ * \par Example.
+ * See alp::eWiseLambda() for a practical and useful example.
+ *
+ * \warning There is no similar concept in the official GraphBLAS specs.
+ *
+ * @see lambda_reference For more details on the returned reference type.
+ * @see alp::eWiseLambda For one legal way in which to use the returned
+ * #lambda_reference.
+ */
+ template< class Monoid >
+ lambda_reference operator()( const size_t i, const Monoid & monoid = Monoid() ) {
+ (void)i;
+ (void)monoid;
+ return PANIC;
+ }
+
+ /**
+ * Returns a lambda reference to an element of this vector. The user
+ * ensures that the requested reference only corresponds to a pre-existing
+ * nonzero in this vector, or undefined behaviour will occur .
+ *
+ * A lambda reference to an element of this vector is only valid when used
+ * inside a lambda function evaluated via alp::eWiseLambda. The lambda
+ * function is called for specific indices only-- that is, the GraphBLAS
+ * implementation decides at which elements to dereference this container.
+ * Outside this scope the returned reference incurs undefined behaviour.
+ *
+ * \warning In particular, for the given index \a i by the lambda function,
+ * it shall be \em illegal to refer to indices relative to that
+ * \a i; including, but not limited to, \f$ i+1 \f$, \f$ i-1 \f$, et
+ * cetera.
+ *
+ * \note As a consequence, this function cannot be used to perform stencil
+ * or halo based operations.
+ *
+ * If a previously non-existing entry of the vector is requested, undefined
+ * behaviour will occur. Functions that are defined to work with references
+ * of this kind, such as alp::eWiseLambda, define exactly which elements are
+ * dereferenced.
+ *
+ * \warning In parallel contexts the use of a returned lambda reference
+ * outside the context of an eWiseLambda will incur at least one of
+ * the following ill effects: it may
+ * -# fail outright,
+ * -# work on stale data,
+ * -# work on incorrect data, or
+ * -# incur high communication costs to guarantee correctness.
+ * In short, such usage causes undefined behaviour. Implementers are
+ * \em not advised to provide GAS-like functionality through this
+ * interface, as it invites bad programming practices and bad
+ * algorithm design decisions. This operator is instead intended to
+ * provide for generic BLAS1-type operations only.
+ *
+ * \note For I/O, use the iterator retrieved via cbegin() instead of
+ * relying on a lambda_reference.
+ *
+ * @param[in] i Which element to return a lambda reference of.
+ * @param[in] ring Under which generalised semiring to interpret the
+ * requested \f$ i \f$th element of this vector.
+ *
+ * \note The \a ring is required to be able to interpret a sparse vector. A
+ * user who is sure this vector is dense, or otherwise is able to
+ * ensure that the a lambda_reference will only be requested at
+ * elements where nonzeroes already exists, may refer to
+ * Vector::operator[],
+ *
+ * @return A lambda reference to the element \a i of this vector.
+ *
+ * \par Example.
+ * See alp::eWiseLambda() for a practical and useful example.
+ *
+ * \warning There is no similar concept in the official GraphBLAS specs.
+ *
+ * @see lambda_reference For more details on the returned reference type.
+ * @see alp::eWiseLambda For one legal way in which to use the returned
+ * #lambda_reference.
+ */
+ lambda_reference operator[]( const size_t i ) {
+ (void)i;
+#ifndef _ALP_NO_EXCEPTIONS
+ assert( false ); // Requesting lambda reference of unimplemented Vector backend.
+#endif
+ }
+ }; // class Vector
+ } // namespace internal
+
+ template<
+ typename T,
+ typename Structure,
+ enum Density density,
+ typename View,
+ typename ImfR,
+ typename ImfC,
+ enum Backend backend
+ >
+ class Vector;
+
+}
+
+#endif // _H_ALP_VECTOR_BASE
diff --git a/include/alp/blas0.hpp b/include/alp/blas0.hpp
new file mode 100644
index 000000000..d69148f93
--- /dev/null
+++ b/include/alp/blas0.hpp
@@ -0,0 +1,34 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 5th of December 2016
+ */
+
+#ifndef _H_ALP_BLAS0
+#define _H_ALP_BLAS0
+
+// now include all specialisations contained in the backend directories:
+#ifdef _ALP_WITH_REFERENCE
+ #include
+#endif
+#ifdef _ALP_WITH_DISPATCH
+ #include
+#endif
+
+#endif // end ``_H_ALP_BLAS0''
diff --git a/include/alp/blas1.hpp b/include/alp/blas1.hpp
new file mode 100644
index 000000000..1d3a72b34
--- /dev/null
+++ b/include/alp/blas1.hpp
@@ -0,0 +1,34 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 29th of March 2017
+ */
+
+#ifndef _H_ALP_BLAS1
+#define _H_ALP_BLAS1
+
+#ifdef _ALP_WITH_REFERENCE
+ #include
+#endif
+#ifdef _ALP_WITH_DISPATCH
+ #include
+#endif
+
+#endif // end ``_H_ALP_BLAS1''
+
diff --git a/include/alp/blas2.hpp b/include/alp/blas2.hpp
new file mode 100644
index 000000000..740b134a9
--- /dev/null
+++ b/include/alp/blas2.hpp
@@ -0,0 +1,39 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ *
+ * Defines the GraphBLAS level 2 API.
+ *
+ * @author A. N. Yzelman
+ * @date: 30th of March 2017.
+ */
+
+#ifndef _H_ALP_BLAS2
+#define _H_ALP_BLAS2
+
+// now include all specialisations contained in the backend directories:
+#ifdef _ALP_WITH_REFERENCE
+ #include
+#endif
+#ifdef _ALP_WITH_DISPATCH
+ #include
+#endif
+
+#endif // end ``_H_ALP_BLAS2''
+
diff --git a/include/alp/blas3.hpp b/include/alp/blas3.hpp
new file mode 100644
index 000000000..95b6ac5c4
--- /dev/null
+++ b/include/alp/blas3.hpp
@@ -0,0 +1,37 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author: A. N. Yzelman
+ */
+
+#ifndef _H_ALP_BLAS3
+#define _H_ALP_BLAS3
+
+// now include all specialisations contained in the backend directories:
+#ifdef _ALP_WITH_REFERENCE
+ #include
+#endif
+#ifdef _ALP_WITH_OMP
+ #include
+#endif
+#ifdef _ALP_WITH_DISPATCH
+ #include
+#endif
+
+#endif // end _H_ALP_BLAS3
+
diff --git a/include/alp/config.hpp b/include/alp/config.hpp
new file mode 100644
index 000000000..3913ee6da
--- /dev/null
+++ b/include/alp/config.hpp
@@ -0,0 +1,35 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 8th of August, 2016
+ */
+
+#ifndef _H_ALP_CONFIG
+#define _H_ALP_CONFIG
+
+// include all active configurations
+#ifdef _ALP_WITH_REFERENCE
+ #include "alp/reference/config.hpp"
+#endif
+#ifdef _ALP_WITH_OMP
+ #include "alp/omp/config.hpp"
+#endif
+
+#endif // end ``_H_ALP_CONFIG''
+
diff --git a/include/alp/density.hpp b/include/alp/density.hpp
new file mode 100644
index 000000000..aee7f1a18
--- /dev/null
+++ b/include/alp/density.hpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ * @file
+ *
+ * This file registers the enum that allows a user to specify the density of a
+ * given ALP container.
+ *
+ */
+
+#ifndef _H_ALP_DENSITY
+#define _H_ALP_DENSITY
+
+
+namespace alp {
+
+ /**
+ * Specifies whether an ALP container is dense or sparse.
+ *
+ * This is specified by the user and may be used by a backend to drive
+ * a choice of a storage scheme.
+ *
+ */
+ enum Density {
+ /**
+ * Dense containers do not allow nonzero elements.
+ *
+ * Depending on the container's \a Structure, the backend may decide to
+ * not store all the elements. For example, an upper triangular matrix
+ * can be stored without the all-zero part below the diagonal.
+ *
+ * @see Structure
+ *
+ */
+ Dense,
+ /**
+ * Sparse containers mostly having nonzero elements.
+ *
+ * The backend can decide which specific format to use.
+ *
+ */
+ Sparse
+ }; // enum Density
+
+} // namespace alp
+
+#endif // _H_ALP_DENSITY
diff --git a/include/alp/descriptors.hpp b/include/alp/descriptors.hpp
new file mode 100644
index 000000000..d9a84b7f8
--- /dev/null
+++ b/include/alp/descriptors.hpp
@@ -0,0 +1,208 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file
+ *
+ * Defines the GraphBLAS various descriptors.
+ *
+ * @author A. N. Yzelman
+ * @date 15 March, 2016
+ */
+
+#ifndef _H_ALP_DESCRIPTOR
+#define _H_ALP_DESCRIPTOR
+
+#include
+
+
+namespace alp {
+
+ /**
+ * Descriptors indicate pre- or post-processing for some or all of the
+ * arguments to a GraphBLAS call.
+ *
+ * They can be combined using bit-wise operators. For instance, to both
+ * indicate the matrix needs be transposed and the mask needs be
+ * inverted, the following descriptor can be passed:
+ * transpose_matrix | invert_mask
+ */
+ typedef unsigned int Descriptor;
+
+ /** Collection of standard descriptors. */
+ namespace descriptors {
+
+ /**
+ * Indicates no additional pre- or post-processing on any of
+ * the GraphBLAS function arguments.
+ */
+ static constexpr Descriptor no_operation = 0;
+
+ /** Inverts the mask prior to applying it. */
+ static constexpr Descriptor invert_mask = 1;
+
+ /**
+ * Transposes the input matrix prior to applying it.
+ */
+ static constexpr Descriptor transpose_matrix = 2;
+
+ /**
+ * For data ingestion methods, such as grb::buildVector or grb::buildMatrix,
+ * this descriptor indicates that the input shall not contain any duplicate
+ * entries.
+ *
+ * Use of this descriptor will speed up the corresponding function call
+ * significantly.
+ *
+ * A call to buildMatrix with this descriptor set will pass its arguments to
+ * buildMatrixUnique.
+ *
+ * \warning Use of this descriptor while the data to be ingested actually
+ * \em does contain duplicates will lead to undefined behaviour.
+ *
+ * Currently, the reference implementation only supports ingesting data
+ * using this descriptor. Support for duplicate input is not yet
+ * implemented everywhere.
+ */
+ static constexpr Descriptor no_duplicates = 4;
+
+ /**
+ * Uses the structure of a mask vector only.
+ *
+ * This ignores the actual values of the mask argument. The i-th element of
+ * the mask now evaluates true if the mask has \em any value assigned to its
+ * i-th index, regardless of how that value evaluates. It evaluates false
+ * if there was no value assigned.
+ *
+ * @see structural_complement
+ */
+ static constexpr Descriptor structural = 8;
+
+ /**
+ * Uses the structural complement of a mask vector.
+ *
+ * This is a convenience short-hand for:
+ * \code
+ * constexpr Descriptor structural_complement = structural | invert_mask;
+ * \endcode
+ *
+ * This ignores the actual values of the mask argument. The i-th element of
+ * the mask now evaluates true if the mask has \em no value assigned to its
+ * i-th index, and evaluates false otherwise.
+ */
+ static constexpr Descriptor structural_complement = structural | invert_mask;
+
+ /**
+ * Indicates all vectors used in a computation are dense. This is a hint that
+ * might affect performance but will never affect the semantics of the
+ * computation.
+ */
+ static constexpr Descriptor dense = 16;
+
+ /**
+ * For any call to a matrix computation, the input matrix \a A is instead
+ * interpreted as \f$ A+I \f$, with \a I the identity matrix of dimension
+ * matching \a A. If \a A is not square, padding zero columns or rows will
+ * be added to \a I in the largest dimension.
+ */
+ static constexpr Descriptor add_identity = 32;
+
+ /**
+ * Instead of using input vector elements, use the index of those elements.
+ *
+ * Indices are cast from their internal data type (size_t , e.g.)
+ * to the relevant domain of the operator used.
+ */
+ static constexpr Descriptor use_index = 64;
+
+ /**
+ * Disallows the standard casting of input parameters to a compatible domain
+ * in case they did not match exactly.
+ *
+ * Setting this descriptor will yield compile-time errors whenever casting
+ * would have been necessary to successfully compile the requested graphBLAS
+ * operation.
+ *
+ * \warning It is illegal to perform conditional toggling on this descriptor.
+ *
+ * \note With conditional toggling, if descr is a descriptor, we
+ * mean if( descr & descriptors::no_casting ) {
+ * new_descr = desc - descriptors::no_casting
+ * //followed by any use of this new descriptor
+ * }
+ *
+ * The reason we cannot allow for this type of toggling is because this
+ * descriptor makes use of the static_assert C++11 function,
+ * which is checked regardless of the result of the if-statement. Thus
+ * the above code actually always throws compile errors on mismatching
+ * domains, no matter the original value in descr .
+ *
+ * \internal Simply making this descriptor the one with the largest integral
+ * value amongst the various descriptors is enough to guarantee
+ * nothing bad will happen. A notable exception are underflows,
+ * which are caught by using internal::MAX_DESCRIPTOR_VALUE.
+ */
+ static constexpr Descriptor no_casting = 256;
+
+ /**
+ * Computation shall proceed with zeros (according to the current semiring)
+ * propagating throughout the requested computation.
+ *
+ * \warning This may lead to unexpected results if the same output container
+ * is interpreted under a different semiring-- what is zero for the current
+ * semiring may not be zero for another. In other words: the concept of
+ * sparsity will no longer generalise to other semirings.
+ */
+ static constexpr Descriptor explicit_zero = 512;
+
+ /**
+ * Indicates overlapping input and output vectors is intentional and safe, due
+ * to, for example, the use of masks.
+ */
+ static constexpr Descriptor safe_overlap = 1024;
+
+ /**
+ * For operations involving 2 matrices, transposes the left-hand side input
+ * matrix prior to applying it.
+ */
+ static constexpr Descriptor transpose_left = 2048;
+
+ /**
+ * For operations involving 2 matrices, transposes the right-hand side input
+ * matrix prior to applying it.
+ */
+ static constexpr Descriptor transpose_right = 4096;
+
+ /**
+ * Translates a descriptor into a string.
+ *
+ * @param[in] descr The input descriptor.
+ *
+ * @returns A detailed English description.
+ */
+ std::string toString( const Descriptor descr );
+
+ } // namespace descriptors
+
+ namespace internal {
+ /** A descriptor cannot have a higher value than the below. */
+ static constexpr Descriptor MAX_DESCRIPTOR_VALUE = 8191;
+ } // namespace internal
+
+} // namespace alp
+
+#endif
diff --git a/include/alp/dispatch/blas0.hpp b/include/alp/dispatch/blas0.hpp
new file mode 100644
index 000000000..0a6c41309
--- /dev/null
+++ b/include/alp/dispatch/blas0.hpp
@@ -0,0 +1,80 @@
+
+/*
+ * Copyright 2021 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @author A. N. Yzelman
+ * @date 14th of January 2022
+ */
+
+#ifndef _H_ALP_DISPATCH_BLAS0
+#define _H_ALP_DISPATCH_BLAS0
+
+#include // std::enable_if, std::is_same
+
+#include
+#include
+#include
+#include