Skip to content

Commit 4a212e3

Browse files
authored
Merge pull request #235 from bonachea/network-conduits
Support distributed-memory (network) configurations
2 parents 43699ed + 7c3b666 commit 4a212e3

File tree

7 files changed

+171
-29
lines changed

7 files changed

+171
-29
lines changed

.github/workflows/build.yml

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,45 +8,70 @@ defaults:
88

99
jobs:
1010
build:
11-
name: ${{ matrix.compiler }}-${{ matrix.version }} (${{ matrix.os }})
11+
name: ${{ matrix.compiler }}-${{ matrix.version }} ${{ matrix.network }} (${{ matrix.os }})
1212
runs-on: ${{ matrix.os }}
1313
strategy:
1414
fail-fast: false
1515
matrix:
1616
os: [macos-13, macos-14, macos-15, ubuntu-24.04]
1717
compiler: [ gfortran ]
1818
version: [ 13, 14 ]
19+
network: [ smp ]
1920
exclude:
2021
- os: macos-15 # gcc-13 broken on macos-15
22+
compiler: gfortran
2123
version: 13
24+
network: smp
2225
include:
26+
# --- flang coverage ---
2327
- os: macos-13
2428
compiler: flang
2529
version: 20
30+
network: smp
2631
- os: macos-14
2732
compiler: flang
2833
version: 20
34+
network: smp
2935
- os: macos-15
3036
compiler: flang
3137
version: 20
38+
network: smp
3239
- os: ubuntu-24.04
3340
compiler: flang
3441
version: 20
42+
network: smp
3543
# https://hub.docker.com/r/phhargrove/llvm-flang/tags
3644
container: phhargrove/llvm-flang:20.1.0-1
37-
SUBJOB_PREFIX: GASNET_PSHM_NODES=2
3845
- os: ubuntu-24.04
3946
compiler: flang
4047
version: 19
48+
network: smp
4149
FFLAGS: -mmlir -allow-assumed-rank
4250
# https://hub.docker.com/r/phhargrove/llvm-flang/tags
4351
container: phhargrove/llvm-flang:19.1.1-1
44-
SUBJOB_PREFIX: GASNET_PSHM_NODES=2
4552
# - os: ubuntu-24.04
4653
# compiler: flang
4754
# version: new
55+
# network: smp
4856
# container: gmao/llvm-flang:latest
49-
# SUBJOB_PREFIX: GASNET_PSHM_NODES=2
57+
# --- udp coverage for selected configs ---
58+
- os: macos-15
59+
compiler: gfortran
60+
version: 14
61+
network: udp
62+
- os: ubuntu-24.04
63+
compiler: gfortran
64+
version: 14
65+
network: udp
66+
- os: macos-15
67+
compiler: flang
68+
version: 20
69+
network: udp
70+
- os: ubuntu-24.04
71+
compiler: flang
72+
version: 20
73+
network: udp
74+
container: phhargrove/llvm-flang:20.1.0-1
5075

5176
container:
5277
image: ${{ matrix.container }}
@@ -56,8 +81,9 @@ jobs:
5681
FFLAGS: ${{ matrix.FFLAGS }}
5782
PREFIX: install
5883
GASNET_CONFIGURE_ARGS: --enable-rpath --enable-debug
59-
GASNET_PSHM_NODES: 8
60-
SUBJOB_PREFIX: ${{ matrix.SUBJOB_PREFIX }}
84+
GASNET_SPAWN_VERBOSE: 1
85+
GASNET_SPAWNFN: L
86+
CAF_IMAGES: 8
6187

6288
steps:
6389
- name: Set gfortran variables
@@ -76,6 +102,18 @@ jobs:
76102
echo "CC=clang" >> "$GITHUB_ENV"
77103
echo "CXX=clang++" >> "$GITHUB_ENV"
78104
105+
- name: Set Caffeine variables
106+
run: |
107+
set -x
108+
# docker instances cannot handle high levels of subjob parallelism
109+
if test -n "${{ matrix.container }}"; then \
110+
echo "SUBJOB_PREFIX=CAF_IMAGES=2" >> "$GITHUB_ENV" ; \
111+
fi
112+
# disable shared-memory bypass with network=udp to simulate multi-node runs
113+
if test "${{ matrix.network }}" = "udp"; then \
114+
echo "GASNET_SUPERNODE_MAXSIZE=1" >> "$GITHUB_ENV" ; \
115+
fi
116+
79117
- name: Checkout code
80118
uses: actions/checkout@v1
81119

@@ -116,9 +154,10 @@ jobs:
116154
if: ${{ contains(matrix.os, 'macos') && matrix.os != 'macos-13' }}
117155
run: |
118156
set -x
119-
curl --retry 5 -LOsS https://github.com/fortran-lang/fpm/releases/download/v0.11.0/fpm-0.11.0.F90
157+
export FPM_VERSION=0.12.0
158+
curl --retry 5 -LOsS https://github.com/fortran-lang/fpm/releases/download/v$FPM_VERSION/fpm-$FPM_VERSION.F90
120159
mkdir fpm-temp
121-
gfortran-14 -o fpm-temp/fpm fpm-0.11.0.F90
160+
gfortran-14 -o fpm-temp/fpm fpm-$FPM_VERSION.F90
122161
echo "PATH=${PWD}/fpm-temp:${PATH}" >> "$GITHUB_ENV"
123162
124163
- name: Version info
@@ -137,11 +176,11 @@ jobs:
137176
for var in FC CC CXX FFLAGS CPPFLAGS CFLAGS LDFLAGS LIBS GASNET_CONFIGURE_ARGS ; do \
138177
eval echo "$var=\$$var"; done
139178
set -x
140-
./install.sh --prefix=${PREFIX} --verbose
179+
./install.sh --prefix=${PREFIX} --network=${{ matrix.network }} --verbose
141180
142181
- name: Run examples
143182
run: |
144-
echo GASNET_PSHM_NODES=${GASNET_PSHM_NODES}
183+
echo CAF_IMAGES=${CAF_IMAGES}
145184
set -x
146185
./build/run-fpm.sh run --verbose --example hello
147186
./build/run-fpm.sh run --verbose --example stop_with_no_code
@@ -151,10 +190,10 @@ jobs:
151190
- name: Run unit tests
152191
run: |
153192
echo SUBJOB_PREFIX=${SUBJOB_PREFIX}
154-
while (( GASNET_PSHM_NODES > 0 )); do \
155-
echo GASNET_PSHM_NODES=${GASNET_PSHM_NODES} ; \
193+
while (( CAF_IMAGES > 0 )); do \
194+
echo CAF_IMAGES=${CAF_IMAGES} ; \
156195
( set -x ; ./build/run-fpm.sh test --verbose -- -d ) ; \
157196
sleep 1 ; \
158-
GASNET_PSHM_NODES=$(( GASNET_PSHM_NODES / 2 )) ; \
197+
CAF_IMAGES=$(( CAF_IMAGES / 2 )) ; \
159198
done
160199

CONTRIBUTING.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ If you encounter problems or limitations when installing or using Caffeine, plea
1010
2. Email the Caffeine [mailing list](mailto:[email protected]) for advice.
1111
3. If steps 1 or 2 do not resolve the problem, please file a [new issue] including
1212
- [ ] The Fortran compiler and compiler version used with Caffeine,
13-
- [ ] The complete output of the build command,
13+
- [ ] The complete output of the install and build commands run with `--verbose` argument,
1414
- [ ] The Caffeine version number or commit hash,
1515
- [ ] Any conditions required to reproduce the problem such as
1616
- [ ] The output of `uname -a` showing the operating system (OS), OS version, and processor architecture,
17-
- [ ] The number of images executed (e.g., the output of `echo $GASNET_PSHM_NODES`),
17+
- [ ] The number of images executed (e.g., the output of `echo $CAF_IMAGES`),
1818
- [ ] The command used to run your program (e.g., `./build/run-fpm.sh run`), and
1919
- [ ] A minimal reproducer: if possible, fewer than 50 lines demonstrating an issue.
2020

README.md

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -88,14 +88,25 @@ Caffeine leverages the following non-parallel features of Fortran to simplify th
8888

8989
Download, build, and run an example
9090
-----------------------------------
91+
Here is an outline of the basic commands used to build Caffeine and run an example:
92+
9193
```
9294
git clone https://github.com/BerkeleyLab/caffeine.git
9395
cd caffeine
94-
./install.sh
95-
export GASNET_PSHM_NODES=8
96-
FC=<Fortran-compiler-path> CC=<C-compiler-path> CXX=<C++-compiler-path> ./build/run-fpm.sh run --example hello
96+
env FC=<Fortran-compiler> CC=<C-compiler> CXX=<C++-compiler> ./install.sh <options>
97+
env CAF_IMAGES=8 ./build/run-fpm.sh run --example hello
9798
```
9899

100+
The provided compilers MUST be "compatible": for the best experience you are
101+
HIGHLY recommended to specify the language frontends provided by a single version
102+
of a given compiler suite installation. The C++ compiler is optional for
103+
single-node deployments (and can be disabled using command-line option `--without-cxx`),
104+
but C++ is required for some network backends.
105+
106+
The `install.sh` recognizes a number of command-line options and environment variables to
107+
customize behavior for your system. See the output of `./install.sh --help` for full documentation.
108+
109+
99110
Example Usage
100111
-------------
101112
The Caffeine parallel runtime is intended as an embedded compilation target
@@ -115,6 +126,31 @@ Run tests
115126
./build/run-fpm.sh test
116127
```
117128

129+
Recognized Environment Variables
130+
--------------------------------
131+
132+
The following environment variables control the execution of the `fpm`-driven Caffeine unit test suite:
133+
134+
* `CAF_IMAGES`: integer that indicates the number of images to run
135+
* `SUBJOB_PREFIX`: command prefix to use for recursive `fpm` invocations in the test suite.
136+
Set `SUBJOB_PREFIX=skip` to disable such invocations (recommended for distributed-memory systems).
137+
138+
The following environment variables control the behavior of the Caffeine library:
139+
140+
* `CAF_HEAP_SIZE=128MB`: set the size of the shared-memory heap used for coarray storage, defaults to 128 MiB
141+
* `CAF_COMP_FRAC=0.10`: set the fraction of the shared-memory heap reserved for non-symmetric allocation, defaults to 10%
142+
143+
Caffeine is built atop the [GASNet-EX] exascale networking middleware, which has its own
144+
set of environment variable knobs to control network-level behavior.
145+
Here are *a few* of the most useful GASNet knobs:
146+
147+
* `GASNET_VERBOSEENV=1`: enable console output of all the envvar settings affecting GASNet operation
148+
* `GASNET_SPAWN_VERBOSE=1`: enable verbose console output of parallel job-spawning steps
149+
* `GASNET_BACKTRACE=1`: enable automatic backtrace upon fatal errors
150+
* `GASNET_SSH_SERVERS="host1 host2"`: space-deliminted list of hostnames for distributed-memory job launch using the ssh-spawner
151+
152+
See [GASNet documentation](https://gasnet.lbl.gov/dist-ex/README) for full details on all settings.
153+
118154
Implementation Status
119155
--------------------
120156

install.sh

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ USAGE:
1212
1313
--help Display this help text
1414
--prefix=PREFIX Install library into 'PREFIX' directory
15+
--network=<NET> Build Caffeine to target given GASNet network conduit.
16+
<NET> should be one of:
17+
smp: single-node shared-memory conduit (default)
18+
udp: portable UDP/IP (for Ethernet networks)
19+
ibv: InfiniBand IB Verbs
20+
ofi: OpenFabrics Interfaces
21+
ucx: Unified Communication X
1522
--prereqs Display a list of prerequisite software.
1623
Default prefix='\$HOME/.local/bin'
1724
--verbose Show verbose build commands
@@ -43,6 +50,7 @@ EOF
4350
GCC_VERSION=${GCC_VERSION:=14}
4451
GASNET_VERSION="stable"
4552
VERBOSE=""
53+
GASNET_CONDUIT="${GASNET_CONDUIT:-smp}"
4654

4755
list_prerequisites()
4856
{
@@ -82,6 +90,16 @@ while [ "$1" != "" ]; do
8290
--prefix)
8391
PREFIX=$VALUE
8492
;;
93+
--network)
94+
GASNET_CONDUIT=$(tr '[:upper:]' '[:lower:]' <<< $VALUE)
95+
case $GASNET_CONDUIT in
96+
smp|udp|mpi|ibv|ofi|ucx) ;;
97+
*)
98+
echo "ERROR: Unrecognized --network=$GASNET_CONDUIT"
99+
print_usage_info
100+
exit 1
101+
esac
102+
;;
85103
--verbose)
86104
VERBOSE="--verbose"
87105
set -x
@@ -304,8 +322,6 @@ EOF
304322
printf "Is it ok to download and install $1? [yes] "
305323
}
306324

307-
# TODO: Expand this to other GASNet conduits (issue #66)
308-
GASNET_CONDUIT=smp
309325
pkg="gasnet-$GASNET_CONDUIT-seq"
310326
export PKG_CONFIG_PATH
311327

@@ -337,8 +353,8 @@ if ! $PKG_CONFIG $pkg ; then
337353
cmd="$cmd --enable-$GASNET_CONDUIT"
338354
cmd="$cmd --enable-seq --disable-par --disable-parsync"
339355
cmd="$cmd --disable-segment-everything"
340-
# TEMPORARY: disable MPI compatibility until Caffeine supports distributed conduits
341-
cmd="$cmd --without-mpicc"
356+
# TEMPORARY: disable MPI compatibility until we figure out how to support in fpm
357+
cmd="$cmd --disable-mpi-compat"
342358
eval $cmd
343359
$MAKE -j 8 all
344360
$MAKE -j 8 install
@@ -366,6 +382,7 @@ GASNET_CPPFLAGS="`$PKG_CONFIG $pkg --variable=GASNET_CPPFLAGS`"
366382
# in the directory path, and assumes that the first directory returned
367383
# by pkg-config contains the GASNet lib directory
368384
GASNET_LIBDIR="$(echo $GASNET_LIBS | awk '{print $1};')"
385+
GASNET_LIBDIR=${GASNET_LIBDIR#-L}
369386
case "$GASNET_LIBDIR" in
370387
*spack* )
371388
cat << EOF
@@ -378,7 +395,12 @@ EOF
378395
exit 1
379396
;;
380397
* )
381-
;; # Do nothing otherwise
398+
GASNET_PREFIX=$(dirname $GASNET_LIBDIR)
399+
if [ ! -r "$GASNET_PREFIX/include/gasnetex.h" ] ; then
400+
echo "ERROR: Failed to detect GASNet install prefix from $GASNET_LIBS"
401+
exit 1
402+
fi
403+
;;
382404
esac
383405

384406
# Strip compiler flags
@@ -395,6 +417,9 @@ echo "# DO NOT EDIT OR COMMIT -- Created by caffeine/install.sh" > build/fpm.tom
395417
cp manifest/fpm.toml.template build/fpm.toml
396418
GASNET_LIB_LOCATIONS=`echo $GASNET_LIBS | awk '{locs=""; for(i = 1; i <= NF; i++) if ($i ~ /^-L/) {locs=(locs " " $i);}; print locs; }'`
397419
GASNET_LIB_NAMES=`echo $GASNET_LIBS | awk '{names=""; for(i = 1; i <= NF; i++) if ($i ~ /^-l/) {names=(names " " $i);}; print names; }' | sed 's/-l//g'`
420+
if [[ $GASNET_CONDUIT == "udp" ]] ; then
421+
GASNET_LIB_NAMES+=" stdc++" # udp-conduit requires C++ libraries
422+
fi
398423
FPM_TOML_LINK_ENTRY="link = [\"$(echo ${GASNET_LIB_NAMES} | sed 's/ /", "/g')\"]"
399424
echo "${FPM_TOML_LINK_ENTRY}" >> build/fpm.toml
400425
ln -f -s build/fpm.toml
@@ -428,9 +453,30 @@ if ! [[ "$user_compiler_flags " =~ -[DU]ASSERTIONS[=\ ] ]] ; then
428453
compiler_flag+=" -DASSERTIONS"
429454
fi
430455

456+
GASNET_CONDUIT_UPPER=$(tr '[:lower:]' '[:upper:]' <<<$GASNET_CONDUIT)
457+
compiler_flag+=" -DCAF_NETWORK_$GASNET_CONDUIT_UPPER"
458+
431459
# Should come last to allow command-line overrides
432460
compiler_flag+=" $user_compiler_flags"
433461

462+
case $GASNET_CONDUIT in
463+
ibv|ofi|ucx)
464+
GASNET_RUNNER_ARG="${GASNET_RUNNER_ARG:-$GASNET_PREFIX/bin/gasnetrun_$GASNET_CONDUIT -n \${CAF_IMAGES:-2}}"
465+
;;
466+
udp)
467+
GASNET_RUNNER_ARG="${GASNET_RUNNER_ARG:-$GASNET_PREFIX/bin/amudprun -n \${CAF_IMAGES:-2}}"
468+
;;
469+
mpi)
470+
GASNET_RUNNER_ARG="${GASNET_RUNNER_ARG:-mpirun -n \${CAF_IMAGES:-2}}"
471+
;;
472+
smp)
473+
GASNET_RUNNER_ARG="${GASNET_RUNNER_ARG:-env GASNET_PSHM_NODES=\${CAF_IMAGES:-\${GASNET_PSHM_NODES:-}}}"
474+
;;
475+
*)
476+
GASNET_RUNNER_ARG="${GASNET_RUNNER_ARG:-}"
477+
;;
478+
esac
479+
434480
RUN_FPM_SH="build/run-fpm.sh"
435481
cat << EOF > $RUN_FPM_SH
436482
#!/bin/sh
@@ -441,6 +487,10 @@ if echo "--help -help --version -version --list -list new update list clean publ
441487
set -x
442488
exec \$fpm "\$fpm_sub_cmd" "\$@"
443489
elif echo "build test run install" | grep -w -q -e "\$fpm_sub_cmd" ; then
490+
sed -i.bak 's/^link = .*\$/$FPM_TOML_LINK_ENTRY/' build/fpm.toml
491+
if test -n "$GASNET_RUNNER_ARG" && echo "test run" | grep -w -q -e "\$fpm_sub_cmd" ; then
492+
set -- "--runner=$GASNET_RUNNER_ARG" "\$@"
493+
fi
444494
set -x
445495
exec \$fpm "\$fpm_sub_cmd" \\
446496
--profile debug \\

src/caffeine/unit_test_parameters_m.F90

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ function subjob_setup() result(result_)
3838

3939
call prif_sync_all()
4040
call prif_this_image_no_coarray(this_image=me)
41-
result_ = (me == 1)
41+
result_ = (me == 1) .and. (subjob_prefix /= "skip")
4242
end function
4343

4444

0 commit comments

Comments
 (0)