# This code supplements the white paper
#    "Cluster-Level Tuning
#     of a Shallow Water Equation Solver
#     on the Intel MIC Architecture"
#     (2014, Andrey Vladimirov and Cliff Addison),
# available at the following URL:
#     http://research.colfaxinternational.com/post/2014/05/12/Shallow-Water.aspx
# You are free to use, modify and distribute this code as long as you acknowledge
# the above mentioned publication.

# Fortran compiler (MPI wrapper needed)
FC=mpiifort

# Fortran compiler flags
FFLAGS=-vec-report1 -O3 -openmp

# Names of executable files. Extension .MIC is for the manycore architecture
TARGET_CPU=shallow_ompmpi_V7
TARGET_MIC=shallow_ompmpi_V7.MIC
TARGET=$(TARGET_CPU) $(TARGET_MIC)

# Problem size, N
RUN_N=5000

# Tuning parameter, H (see paper)
RUN_H_CPU=1
RUN_H_MIC=2

# Number of threads per MPI process on the CPU and MIC architecture
# This is a tuning parameter of the calculation
RUN_THREADS_CPU=2
RUN_THREADS_MIC=6

# Number of MPI processes per node for CPU architecture
# RUN_THREADS_CPU*RUN_PERNODE_CPU must be equal to the number of
# physical CPU or MIC cores for optimal bandwidth utilization
RUN_PERNODE_CPU=12
RUN_PERNODE_MIC=10

%.MIC: %.f
	$(FC) $(FFLAGS) -mmic $^ -o $@

%: %.f
	$(FC) $(FFLAGS) $^ -o $@

all: $(TARGET)
	$(info To run the calculation, use command "make run-cpu" or "make run-mic")

# Run "make run-cpu" to launch a calculation on the CPU
# Change "localhost" to the hostname(s) of the compute nodes according to the configuration of your cluster.
run-cpu: $(TARGET)
	$(info Running a problem with N=$(RUN_N) and H=$(RUN_H_CPU) on the CPU using P=$(RUN_PERNODE_CPU) processes per node and T=$(RUN_THREADS_CPU) threads per process.)
	$(info Modify the Makefile to change the values of tuning parameters and to modify the list of compute nodes)
	echo "localhost:$(RUN_PERNODE_CPU)" > hosts-cpu.txt
	mpirun -machinefile hosts-cpu.txt  -env "OMP_NUM_THREADS=$(RUN_THREADS_CPU)" $(PWD)/$(TARGET_CPU) $(RUN_N) $(RUN_N) $(RUN_H_CPU)

# Run "make run-mic" to launch a calculation on the CPU
# Change "mic0" to the full hostname(s) of the Xeon Phi coprocessors according to the configuration of your cluster.
run-mic: $(TARGET)
	$(info Running a problem with N=$(RUN_N) and H=$(RUN_H_MIC) on the MIC architecture using P=$(RUN_PERNODE_MIC) processes per coprocessor and T=$(RUN_THREADS_MIC) threads per process.)
	$(info Modify the Makefile to change the values of tuning parameters and to modify the list of compute nodes)
	echo "mic0:$(RUN_PERNODE_MIC)" > hosts-mic.txt
	I_MPI_MIC=1 mpirun -machinefile hosts-mic.txt -env "LD_LIBRARY_PATH=$(MIC_LD_LIBRARY_PATH)" -env "OMP_NUM_THREADS=$(RUN_THREADS_MIC)" $(PWD)/$(TARGET_MIC) $(RUN_N) $(RUN_N) $(RUN_H_MIC)


clean:
	rm -f $(TARGET)
