Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
7512416
refactor: prepare MD runner and ML buffers
Audrey-777 May 30, 2026
1a608fd
refactor: extract MD statistics state helpers
Audrey-777 May 30, 2026
a099505
refactor: introduce MD test fixtures
Audrey-777 May 30, 2026
6b545c7
docs: record MD pre-parallel refactor
Audrey-777 May 30, 2026
ca3dcb1
optimize: add OpenMP to MD base loops and NEP interface
Audrey-777 May 31, 2026
19117e0
docs: record OpenMP NEP and MD base changes
Audrey-777 May 31, 2026
5f60d47
docs: add MD OpenMP planning and benchmark results
Audrey-777 Jun 2, 2026
e7bfb0c
optimize: add OpenMP to Verlet thermalize, MSST rescale, and NHC part…
yyya18 Jun 3, 2026
79cf84a
optimize: add OpenMP to DPMD interface - coord fill & force copy back
yyya18 Jun 3, 2026
6c8551f
docs: add optimization records for Verlet, MSST, NHC, and DPMD OpenMP…
yyya18 Jun 3, 2026
e999653
merge: integrate refactor/parallel-optimize (DPMD+thermostat OpenMP) …
lijianing-sudo Jun 4, 2026
546a9c6
optimize: add OpenMP to remaining MD per-atom loops (rescale_vel, MSS…
lijianing-sudo Jun 4, 2026
56538dc
fix: move 'if' clause from '#pragma omp for' to '#pragma omp parallel'
lijianing-sudo Jun 4, 2026
acb660e
merge remote-tracking branch 'upstream/develop' into refactor/paralle…
yyya18 Jun 24, 2026
422f312
Merge team branch, use OpenMP version from team
yyya18 Jun 24, 2026
d01a033
refactor(md): add OpenMP parallelization for LJ virial, thermostats, …
yyya18 Jun 26, 2026
2cd0857
chore: remove planning docs from branch
yyya18 Jun 26, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
611 changes: 611 additions & 0 deletions Test/openmp_nep_basic_benchmark.cpp

Large diffs are not rendered by default.

37 changes: 37 additions & 0 deletions Test/results/openmp_nep_basic_benchmark.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
kernel,threads,serial_ms,omp_ms,speedup,efficiency,max_abs_diff,checksum
md_update_pos,1,13.007151,13.052355,0.996537,0.996537,0.000000e+00,6.693102e-02
md_update_vel,1,14.545481,14.496280,1.003394,1.003394,0.000000e+00,1.811520e+00
md_kinetic_energy,1,6.925990,6.857271,1.010021,1.010021,0.000000e+00,1.205301e+02
md_temp_vector,1,8.115525,8.224651,0.986732,0.986732,0.000000e+00,7.061666e+02
md_force_copy,1,9.308984,9.119837,1.020740,1.020740,0.000000e+00,4.932209e+00
nep_coord_fill,1,10.322396,10.129846,1.019008,1.019008,0.000000e+00,6.562763e+03
nep_energy_sum,1,3.037125,3.027364,1.003224,1.003224,0.000000e+00,5.979382e+04
nep_force_fill,1,8.900827,8.746296,1.017668,1.017668,0.000000e+00,2.851195e+00
nep_virial_sum,1,28.520833,12.451097,2.290628,2.290628,0.000000e+00,4.036082e+04
md_update_pos,2,13.098582,6.573173,1.992733,0.996367,0.000000e+00,6.693102e-02
md_update_vel,2,14.820314,7.287271,2.033726,1.016863,0.000000e+00,1.811520e+00
md_kinetic_energy,2,7.014427,3.325272,2.109430,1.054715,2.036415e-10,1.205301e+02
md_temp_vector,2,8.208209,4.038653,2.032413,1.016206,1.600000e-10,7.061666e+02
md_force_copy,2,9.362898,4.795636,1.952379,0.976190,0.000000e+00,4.932209e+00
nep_coord_fill,2,10.446854,5.126669,2.037747,1.018874,0.000000e+00,6.562763e+03
nep_energy_sum,2,3.059902,1.525828,2.005405,1.002702,8.811185e-09,5.979382e+04
nep_force_fill,2,8.925261,4.459652,2.001336,1.000668,0.000000e+00,2.851195e+00
nep_virial_sum,2,28.700600,6.623542,4.333120,2.166560,9.997166e-09,4.036082e+04
md_update_pos,4,13.473068,3.496484,3.853319,0.963330,0.000000e+00,6.693102e-02
md_update_vel,4,14.817903,3.852381,3.846427,0.961607,0.000000e+00,1.811520e+00
md_kinetic_energy,4,7.070885,1.874085,3.772980,0.943245,2.242899e-10,1.205301e+02
md_temp_vector,4,8.508072,2.182533,3.898256,0.974564,1.489155e-10,7.061666e+02
md_force_copy,4,9.537895,2.563152,3.721158,0.930290,0.000000e+00,4.932209e+00
nep_coord_fill,4,10.574139,2.837437,3.726652,0.931663,0.000000e+00,6.562763e+03
nep_energy_sum,4,3.115476,0.785601,3.965723,0.991431,2.277375e-08,5.979382e+04
nep_force_fill,4,9.159472,2.378972,3.850181,0.962545,0.000000e+00,2.851195e+00
nep_virial_sum,4,28.671869,3.517298,8.151675,2.037919,6.323717e-09,4.036082e+04
md_update_pos,8,13.427898,1.823981,7.361864,0.920233,0.000000e+00,6.693102e-02
md_update_vel,8,14.830449,2.062811,7.189436,0.898679,0.000000e+00,1.811520e+00
md_kinetic_energy,8,7.156275,1.025259,6.979966,0.872496,1.775788e-10,1.205301e+02
md_temp_vector,8,8.650072,1.172442,7.377825,0.922228,1.680860e-10,7.061666e+02
md_force_copy,8,9.885551,1.377531,7.176284,0.897036,0.000000e+00,4.932209e+00
nep_coord_fill,8,11.209954,1.566864,7.154390,0.894299,0.000000e+00,6.562763e+03
nep_energy_sum,8,3.211079,0.399830,8.031107,1.003888,1.861918e-08,5.979382e+04
nep_force_fill,8,9.512588,1.360189,6.993579,0.874197,0.000000e+00,2.851195e+00
nep_virial_sum,8,28.884186,2.028955,14.235989,1.779499,8.529241e-09,4.036082e+04
10 changes: 10 additions & 0 deletions Test/results/run_1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
kernel,threads,serial_ms,omp_ms,speedup,efficiency,max_abs_diff,checksum
md_update_pos,1,13.007151,13.052355,0.996537,0.996537,0.000000e+00,6.693102e-02
md_update_vel,1,14.545481,14.496280,1.003394,1.003394,0.000000e+00,1.811520e+00
md_kinetic_energy,1,6.925990,6.857271,1.010021,1.010021,0.000000e+00,1.205301e+02
md_temp_vector,1,8.115525,8.224651,0.986732,0.986732,0.000000e+00,7.061666e+02
md_force_copy,1,9.308984,9.119837,1.020740,1.020740,0.000000e+00,4.932209e+00
nep_coord_fill,1,10.322396,10.129846,1.019008,1.019008,0.000000e+00,6.562763e+03
nep_energy_sum,1,3.037125,3.027364,1.003224,1.003224,0.000000e+00,5.979382e+04
nep_force_fill,1,8.900827,8.746296,1.017668,1.017668,0.000000e+00,2.851195e+00
nep_virial_sum,1,28.520833,12.451097,2.290628,2.290628,0.000000e+00,4.036082e+04
10 changes: 10 additions & 0 deletions Test/results/run_2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
kernel,threads,serial_ms,omp_ms,speedup,efficiency,max_abs_diff,checksum
md_update_pos,2,13.098582,6.573173,1.992733,0.996367,0.000000e+00,6.693102e-02
md_update_vel,2,14.820314,7.287271,2.033726,1.016863,0.000000e+00,1.811520e+00
md_kinetic_energy,2,7.014427,3.325272,2.109430,1.054715,2.036415e-10,1.205301e+02
md_temp_vector,2,8.208209,4.038653,2.032413,1.016206,1.600000e-10,7.061666e+02
md_force_copy,2,9.362898,4.795636,1.952379,0.976190,0.000000e+00,4.932209e+00
nep_coord_fill,2,10.446854,5.126669,2.037747,1.018874,0.000000e+00,6.562763e+03
nep_energy_sum,2,3.059902,1.525828,2.005405,1.002702,8.811185e-09,5.979382e+04
nep_force_fill,2,8.925261,4.459652,2.001336,1.000668,0.000000e+00,2.851195e+00
nep_virial_sum,2,28.700600,6.623542,4.333120,2.166560,9.997166e-09,4.036082e+04
10 changes: 10 additions & 0 deletions Test/results/run_4.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
kernel,threads,serial_ms,omp_ms,speedup,efficiency,max_abs_diff,checksum
md_update_pos,4,13.473068,3.496484,3.853319,0.963330,0.000000e+00,6.693102e-02
md_update_vel,4,14.817903,3.852381,3.846427,0.961607,0.000000e+00,1.811520e+00
md_kinetic_energy,4,7.070885,1.874085,3.772980,0.943245,2.242899e-10,1.205301e+02
md_temp_vector,4,8.508072,2.182533,3.898256,0.974564,1.489155e-10,7.061666e+02
md_force_copy,4,9.537895,2.563152,3.721158,0.930290,0.000000e+00,4.932209e+00
nep_coord_fill,4,10.574139,2.837437,3.726652,0.931663,0.000000e+00,6.562763e+03
nep_energy_sum,4,3.115476,0.785601,3.965723,0.991431,2.277375e-08,5.979382e+04
nep_force_fill,4,9.159472,2.378972,3.850181,0.962545,0.000000e+00,2.851195e+00
nep_virial_sum,4,28.671869,3.517298,8.151675,2.037919,6.323717e-09,4.036082e+04
10 changes: 10 additions & 0 deletions Test/results/run_8.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
kernel,threads,serial_ms,omp_ms,speedup,efficiency,max_abs_diff,checksum
md_update_pos,8,13.427898,1.823981,7.361864,0.920233,0.000000e+00,6.693102e-02
md_update_vel,8,14.830449,2.062811,7.189436,0.898679,0.000000e+00,1.811520e+00
md_kinetic_energy,8,7.156275,1.025259,6.979966,0.872496,1.775788e-10,1.205301e+02
md_temp_vector,8,8.650072,1.172442,7.377825,0.922228,1.680860e-10,7.061666e+02
md_force_copy,8,9.885551,1.377531,7.176284,0.897036,0.000000e+00,4.932209e+00
nep_coord_fill,8,11.209954,1.566864,7.154390,0.894299,0.000000e+00,6.562763e+03
nep_energy_sum,8,3.211079,0.399830,8.031107,1.003888,1.861918e-08,5.979382e+04
nep_force_fill,8,9.512588,1.360189,6.993579,0.874197,0.000000e+00,2.851195e+00
nep_virial_sum,8,28.884186,2.028955,14.235989,1.779499,8.529241e-09,4.036082e+04
41 changes: 41 additions & 0 deletions Test/run_openmp_nep_basic_benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env bash
set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
BUILD_DIR="${SCRIPT_DIR}/build"
RESULT_DIR="${SCRIPT_DIR}/results"
BIN="${BUILD_DIR}/openmp_nep_basic_benchmark"
CSV="${RESULT_DIR}/openmp_nep_basic_benchmark.csv"
LOG="${RESULT_DIR}/openmp_nep_basic_benchmark.log"

NATOMS="${NATOMS:-2000000}"
REPEAT="${REPEAT:-5}"
CXX="${CXX:-g++}"

mkdir -p "${BUILD_DIR}" "${RESULT_DIR}"

{
echo "Compiler: $(${CXX} --version | head -n 1)"
echo "NATOMS=${NATOMS}"
echo "REPEAT=${REPEAT}"
echo "Build: ${CXX} -O3 -std=c++17 -fopenmp"
} > "${LOG}"

"${CXX}" -O3 -std=c++17 -fopenmp "${SCRIPT_DIR}/openmp_nep_basic_benchmark.cpp" -o "${BIN}" 2>&1 | tee -a "${LOG}"

: > "${CSV}"
for threads in 1 2 4 8; do
echo "Running with OMP_NUM_THREADS=${threads}" | tee -a "${LOG}"
export OMP_NUM_THREADS="${threads}"
export OMP_PROC_BIND="${OMP_PROC_BIND:-close}"
export OMP_PLACES="${OMP_PLACES:-cores}"
tmp_csv="${RESULT_DIR}/run_${threads}.csv"
"${BIN}" --threads "${threads}" --natoms "${NATOMS}" --repeat "${REPEAT}" > "${tmp_csv}"
if [[ "${threads}" == "1" ]]; then
cat "${tmp_csv}" >> "${CSV}"
else
tail -n +2 "${tmp_csv}" >> "${CSV}"
fi
done

echo "CSV: ${CSV}" | tee -a "${LOG}"
76 changes: 47 additions & 29 deletions source/source_esolver/esolver_dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ void ESolver_DP::before_all_runners(UnitCell& ucell, const Input_para& inp)
dp_potential = 0;
dp_force.create(ucell.nat, 3);
dp_virial.create(3, 3);
dp_cell.resize(9);
dp_coord.resize(3 * ucell.nat);
dp_model_force.clear();
dp_model_virial.clear();

ModuleIO::CifParser::write(PARAM.globalv.global_out_dir + "STRU.cif",
ucell,
Expand All @@ -44,6 +48,20 @@ void ESolver_DP::before_all_runners(UnitCell& ucell, const Input_para& inp)

atype.resize(ucell.nat);

// Build flat atom index for OpenMP coordinate fill in runner()
atom_type_index.resize(ucell.nat);
atom_local_index.resize(ucell.nat);
int iat = 0;
for (int it = 0; it < ucell.ntype; ++it)
{
for (int ia = 0; ia < ucell.atoms[it].na; ++ia)
{
atom_type_index[iat] = it;
atom_local_index[iat] = ia;
iat++;
}
}

rescaling = inp.mdp.dp_rescaling;
fparam = inp.mdp.dp_fparam;
aparam = inp.mdp.dp_aparam;
Expand All @@ -59,38 +77,36 @@ void ESolver_DP::runner(UnitCell& ucell, const int istep)
ModuleBase::TITLE("ESolver_DP", "runner");
ModuleBase::timer::start("ESolver_DP", "runner");

std::vector<double> cell(9, 0.0);
cell[0] = ucell.latvec.e11 * ucell.lat0_angstrom;
cell[1] = ucell.latvec.e12 * ucell.lat0_angstrom;
cell[2] = ucell.latvec.e13 * ucell.lat0_angstrom;
cell[3] = ucell.latvec.e21 * ucell.lat0_angstrom;
cell[4] = ucell.latvec.e22 * ucell.lat0_angstrom;
cell[5] = ucell.latvec.e23 * ucell.lat0_angstrom;
cell[6] = ucell.latvec.e31 * ucell.lat0_angstrom;
cell[7] = ucell.latvec.e32 * ucell.lat0_angstrom;
cell[8] = ucell.latvec.e33 * ucell.lat0_angstrom;

std::vector<double> coord(3 * ucell.nat, 0.0);
int iat = 0;
for (int it = 0; it < ucell.ntype; ++it)
dp_cell[0] = ucell.latvec.e11 * ucell.lat0_angstrom;
dp_cell[1] = ucell.latvec.e12 * ucell.lat0_angstrom;
dp_cell[2] = ucell.latvec.e13 * ucell.lat0_angstrom;
dp_cell[3] = ucell.latvec.e21 * ucell.lat0_angstrom;
dp_cell[4] = ucell.latvec.e22 * ucell.lat0_angstrom;
dp_cell[5] = ucell.latvec.e23 * ucell.lat0_angstrom;
dp_cell[6] = ucell.latvec.e31 * ucell.lat0_angstrom;
dp_cell[7] = ucell.latvec.e32 * ucell.lat0_angstrom;
dp_cell[8] = ucell.latvec.e33 * ucell.lat0_angstrom;

dp_coord.resize(3 * ucell.nat);
const int nat = ucell.nat;
#pragma omp parallel for schedule(static) if (nat >= 256)
for (int iat = 0; iat < nat; ++iat)
{
for (int ia = 0; ia < ucell.atoms[it].na; ++ia)
{
coord[3 * iat] = ucell.atoms[it].tau[ia].x * ucell.lat0_angstrom;
coord[3 * iat + 1] = ucell.atoms[it].tau[ia].y * ucell.lat0_angstrom;
coord[3 * iat + 2] = ucell.atoms[it].tau[ia].z * ucell.lat0_angstrom;
iat++;
}
const int it = atom_type_index[iat];
const int ia = atom_local_index[iat];
dp_coord[3 * iat] = ucell.atoms[it].tau[ia].x * ucell.lat0_angstrom;
dp_coord[3 * iat + 1] = ucell.atoms[it].tau[ia].y * ucell.lat0_angstrom;
dp_coord[3 * iat + 2] = ucell.atoms[it].tau[ia].z * ucell.lat0_angstrom;
}
assert(ucell.nat == iat);

#ifdef __DPMD
std::vector<double> f, v;
dp_potential = 0;
dp_force.zero_out();
dp_virial.zero_out();
dp_model_force.clear();
dp_model_virial.clear();

dp.compute(dp_potential, f, v, coord, atype, cell, fparam, aparam);
dp.compute(dp_potential, dp_model_force, dp_model_virial, dp_coord, atype, dp_cell, fparam, aparam);

// rescale the energy, force, and stress
const double fact_e = rescaling / ModuleBase::Ry_to_eV;
Expand All @@ -101,18 +117,20 @@ void ESolver_DP::runner(UnitCell& ucell, const int istep)
GlobalV::ofs_running << " #TOTAL ENERGY# " << std::setprecision(11) << dp_potential * ModuleBase::Ry_to_eV << " eV"
<< std::endl;

for (int i = 0; i < ucell.nat; ++i)
const int nat_f = ucell.nat;
#pragma omp parallel for schedule(static) if (nat_f >= 256)
for (int i = 0; i < nat_f; ++i)
{
dp_force(i, 0) = f[3 * i] * fact_f;
dp_force(i, 1) = f[3 * i + 1] * fact_f;
dp_force(i, 2) = f[3 * i + 2] * fact_f;
dp_force(i, 0) = dp_model_force[3 * i] * fact_f;
dp_force(i, 1) = dp_model_force[3 * i + 1] * fact_f;
dp_force(i, 2) = dp_model_force[3 * i + 2] * fact_f;
}

for (int i = 0; i < 3; ++i)
{
for (int j = 0; j < 3; ++j)
{
dp_virial(i, j) = v[3 * i + j] * fact_v;
dp_virial(i, j) = dp_model_virial[3 * i + j] * fact_v;
}
}
#else
Expand Down
6 changes: 6 additions & 0 deletions source/source_esolver/esolver_dp.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,12 +109,18 @@ class ESolver_DP : public ESolver

std::string dp_file; ///< directory of DP model file
std::vector<int> atype = {}; ///< atom type corresponding to DP model
std::vector<int> atom_type_index; ///< type index (it) for each global atom iat
std::vector<int> atom_local_index; ///< local index (ia) within type for each global atom iat
std::vector<double> fparam = {}; ///< frame parameter for dp potential: dim_fparam
std::vector<double> aparam = {}; ///< atomic parameter for dp potential: natoms x dim_aparam
double rescaling = 1.0; ///< rescaling factor for DP model
double dp_potential = 0.0; ///< computed potential energy
ModuleBase::matrix dp_force; ///< computed atomic forces
ModuleBase::matrix dp_virial; ///< computed lattice virials
std::vector<double> dp_cell; ///< DP cell buffer in Angstrom
std::vector<double> dp_coord; ///< DP coordinate buffer in Angstrom
std::vector<double> dp_model_force; ///< raw force buffer returned by DP
std::vector<double> dp_model_virial; ///< raw virial buffer returned by DP
};

} // namespace ModuleESolver
Expand Down
Loading
Loading