diff --git a/source/source_estate/module_dm/density_matrix.cpp b/source/source_estate/module_dm/density_matrix.cpp index 7414e00fd3b..f18d068db73 100644 --- a/source/source_estate/module_dm/density_matrix.cpp +++ b/source/source_estate/module_dm/density_matrix.cpp @@ -655,7 +655,7 @@ void DensityMatrix_Tools::func_xyz_to_updown(const std::complex { target_DMR_mat[icol + step_trace[0]] = tmp[0].real() + tmp[3].real(); // rho_0 = (rho_upup + rho_downdown).real() target_DMR_mat[icol + step_trace[1]] = tmp[1].real() + tmp[2].real(); // rho_x = (rho_updown + rho_downup).real() - target_DMR_mat[icol + step_trace[2]] = -tmp[1].imag() + tmp[2].imag(); // rho_y = (i * (rho_updown - rho_downup)).real() + target_DMR_mat[icol + step_trace[2]] = tmp[1].imag() - tmp[2].imag(); // rho_y = Im(rho_updown - rho_downup) target_DMR_mat[icol + step_trace[3]] = tmp[0].real() - tmp[3].real(); // rho_z = (rho_upup - rho_downdown).real() } @@ -664,7 +664,7 @@ void DensityMatrix_Tools::func_xyz_to_updown>(const std::co { target_DMR_mat[icol + step_trace[0]] = tmp[0] + tmp[3]; // rho_0 = (rho_upup + rho_downdown) target_DMR_mat[icol + step_trace[1]] = tmp[1] + tmp[2]; // rho_x = (rho_updown + rho_downup) - target_DMR_mat[icol + step_trace[2]] = ModuleBase::IMAG_UNIT * (tmp[1].imag() - tmp[2].imag()); // rho_y = (i * (rho_updown - rho_downup)) + target_DMR_mat[icol + step_trace[2]] = -ModuleBase::IMAG_UNIT * (tmp[1] - tmp[2]); // rho_y = -i*(rho_updown - rho_downup) target_DMR_mat[icol + step_trace[3]] = tmp[0] - tmp[3]; // rho_z = (rho_upup - rho_downdown) } diff --git a/source/source_hsolver/module_genelpa/elpa_new.cpp b/source/source_hsolver/module_genelpa/elpa_new.cpp index d0454821907..dc590c71320 100644 --- a/source/source_hsolver/module_genelpa/elpa_new.cpp +++ b/source/source_hsolver/module_genelpa/elpa_new.cpp @@ -94,6 +94,10 @@ ELPA_Solver::ELPA_Solver(const bool isReal, elpa_set_integer(NEW_ELPA_HANDLE_POOL[handle_id], "mpi_comm_parent", MPI_Comm_c2f(comm), &error); elpa_set_integer(NEW_ELPA_HANDLE_POOL[handle_id], "process_row", myprow, &error); elpa_set_integer(NEW_ELPA_HANDLE_POOL[handle_id], "process_col", mypcol, &error); + // blacs_context is required by ELPA for internal MPI operations + // (e.g. MPI_Bcast in complex Cholesky/invert_triangular); + // previously missing in this constructor but present in the otherParameter one + elpa_set_integer(NEW_ELPA_HANDLE_POOL[handle_id], "blacs_context", cblacs_ctxt, &error); error = elpa_setup(NEW_ELPA_HANDLE_POOL[handle_id]); // cout<<"elpa handle is setup\n"; diff --git a/source/source_lcao/module_gint/gint_common.cpp b/source/source_lcao/module_gint/gint_common.cpp index 987fcbe5a0e..1de8969fd0d 100644 --- a/source/source_lcao/module_gint/gint_common.cpp +++ b/source/source_lcao/module_gint/gint_common.cpp @@ -168,8 +168,15 @@ void merge_hr_part_to_hR(const std::vector>& hr_gint_ std::vector row_set = {0, 0, 1, 1}; std::vector col_set = {0, 1, 0, 1}; //construct complex matrix + // Pauli-to-spinor conversion: H = V_0*I + B_x*sigma_x + B_y*sigma_y + B_z*sigma_z + // sigma_y = [[0,-i],[i,0]], so H_{up,down} = B_x - i*B_y, H_{down,up} = B_x + i*B_y + // coefficient = clx_i + i*clx_j for each Pauli channel: + // is=0 (up,up): V_0 + B_z => coeff on B_z = +1 => clx_i=1, clx_j=0 + // is=1 (up,down): B_x - i*B_y => coeff on B_y = -i => clx_i=0, clx_j=-1 + // is=2 (down,up): B_x + i*B_y => coeff on B_y = +i => clx_i=0, clx_j=+1 + // is=3 (down,down): -(V_0 - B_z) => coeff on V_0 = -1 => clx_i=-1, clx_j=0 std::vector clx_i = {1, 0, 0, -1}; - std::vector clx_j = {0, 1, -1, 0}; + std::vector clx_j = {0, -1, 1, 0}; for (int is = 0; is < 4; is++){ if(!PARAM.globalv.domag && (is==1 || is==2)) continue; hR_tmp->set_zero(); @@ -203,9 +210,10 @@ void merge_hr_part_to_hR(const std::vector>& hr_gint_ + std::complex(clx_i[is], clx_j[is]) * mat_nspin2->get_value(irow, icol); } } - //fill the lower triangle matrix - //When is=0 or 3, the real part does not need conjugation; - //when is=1 or 2, the small matrix is not Hermitian, so conjugation is not needed + //fill the lower triangle matrix at -R by conjugate transpose of upper at R + // This ensures H(-R) = H(R)^dagger, required for Hermiticity of H(k). + // For real matrices (is=0,3), conj has no effect. + // For complex matrices (is=1,2), conj is essential. if (iat1 < iat2) { auto lower_mat = lower_ap->find_matrix(-R_index); @@ -213,7 +221,7 @@ void merge_hr_part_to_hR(const std::vector>& hr_gint_ { for (int icol = 0; icol < upper_mat->get_col_size(); ++icol) { - lower_mat->get_value(icol, irow) = upper_mat->get_value(irow, icol); + lower_mat->get_value(icol, irow) = std::conj(upper_mat->get_value(irow, icol)); } } } diff --git a/source/source_lcao/module_operator_lcao/dftu_force_stress.hpp b/source/source_lcao/module_operator_lcao/dftu_force_stress.hpp index 38c96025fa5..e9a546c2b97 100644 --- a/source/source_lcao/module_operator_lcao/dftu_force_stress.hpp +++ b/source/source_lcao/module_operator_lcao/dftu_force_stress.hpp @@ -146,13 +146,6 @@ void DFTU>::cal_force_stress(const bool cal_force, std::vector VU(occ.size()); double eu_tmp = 0; this->cal_v_of_u(occ, tlp1, u_value, &VU[0], eu_tmp); - if(this->nspin == 4) - { - for (int i = 0; i < VU.size(); i++) - { - VU[i] /= 2.0; - } - } // second iteration to calculate force and stress // calculate Force for atom J @@ -242,12 +235,14 @@ void DFTU>::cal_force_stress(const bool cal_force, if (cal_force) { #ifdef __MPI - // sum up the occupation matrix Parallel_Reduce::reduce_all(force.c, force.nr * force.nc); #endif - for (int i = 0; i < force.nr * force.nc; i++) + if (this->nspin != 4) { - force.c[i] *= 2.0; + for (int i = 0; i < force.nr * force.nc; i++) + { + force.c[i] *= 2.0; + } } } diff --git a/source/source_lcao/module_operator_lcao/dftu_lcao.cpp b/source/source_lcao/module_operator_lcao/dftu_lcao.cpp index 2b8e4f3506f..92fac0ffabd 100644 --- a/source/source_lcao/module_operator_lcao/dftu_lcao.cpp +++ b/source/source_lcao/module_operator_lcao/dftu_lcao.cpp @@ -612,7 +612,14 @@ void hamilt::DFTU, std::complex transfer from double to std::complex + // Pauli-to-spinor conversion for DFT+U potential: + // V = V_0*I + V_x*sigma_x + V_y*sigma_y + V_z*sigma_z + // sigma_y = [[0,-i],[i,0]], so: + // V_{up,up} = 0.5*(V_0 + V_z) + // V_{down,down} = 0.5*(V_0 - V_z) + // V_{up,down} = 0.5*(V_x - i*V_y) <- note: minus sign from sigma_y + // V_{down,up} = 0.5*(V_x + i*V_y) <- note: plus sign from sigma_y + // This is consistent with the convention in gint_common.cpp merge_hr_part_to_hR(). const int m_size = int(sqrt(vu.size()) / 2); const int m_size2 = m_size * m_size; vu.resize(vu_tmp.size()); @@ -627,9 +634,8 @@ void hamilt::DFTU, std::complex type, but here we use double type for test - vu[index[1]] = 0.5 * (vu_tmp[index[1]] + std::complex(0.0, 1.0) * vu_tmp[index[2]]); - vu[index[2]] = 0.5 * (vu_tmp[index[1]] - std::complex(0.0, 1.0) * vu_tmp[index[2]]); + vu[index[1]] = 0.5 * (vu_tmp[index[1]] - std::complex(0.0, 1.0) * vu_tmp[index[2]]); + vu[index[2]] = 0.5 * (vu_tmp[index[1]] + std::complex(0.0, 1.0) * vu_tmp[index[2]]); } } } diff --git a/source/source_lcao/module_operator_lcao/dspin_force_stress.hpp b/source/source_lcao/module_operator_lcao/dspin_force_stress.hpp index 955c6b4d267..1fe8812e9b9 100644 --- a/source/source_lcao/module_operator_lcao/dspin_force_stress.hpp +++ b/source/source_lcao/module_operator_lcao/dspin_force_stress.hpp @@ -212,12 +212,14 @@ void DeltaSpin>::cal_force_stress(const bool cal_force, if (cal_force) { #ifdef __MPI - // sum up the occupation matrix Parallel_Reduce::reduce_all(force.c, force.nr * force.nc); #endif - for (int i = 0; i < force.nr * force.nc; i++) + if (this->nspin != 4) { - force.c[i] *= 2.0; + for (int i = 0; i < force.nr * force.nc; i++) + { + force.c[i] *= 2.0; + } } } diff --git a/tests/03_NAO_multik/scf_angle_spin4/result.ref b/tests/03_NAO_multik/scf_angle_spin4/result.ref index 1a6409ad032..e1656f8f8ce 100644 --- a/tests/03_NAO_multik/scf_angle_spin4/result.ref +++ b/tests/03_NAO_multik/scf_angle_spin4/result.ref @@ -1,5 +1,5 @@ -etotref -6267.4651888505040915 -etotperatomref -3133.7325944253 +etotref -6267.4651896196382950 +etotperatomref -3133.7325948098 totalforceref 0.000000 -totalstressref 3912.920415 -totaltimeref 1.24 +totalstressref 3912.920437 +totaltimeref 15.08 diff --git a/tests/03_NAO_multik/scf_angle_spin4/threshold b/tests/03_NAO_multik/scf_angle_spin4/threshold new file mode 100644 index 00000000000..33e27fb6c5f --- /dev/null +++ b/tests/03_NAO_multik/scf_angle_spin4/threshold @@ -0,0 +1 @@ +threshold 0.00001 diff --git a/tests/03_NAO_multik/scf_out_dos_spin4/result.ref b/tests/03_NAO_multik/scf_out_dos_spin4/result.ref index d126df468a6..e54b51d3064 100644 --- a/tests/03_NAO_multik/scf_out_dos_spin4/result.ref +++ b/tests/03_NAO_multik/scf_out_dos_spin4/result.ref @@ -1,6 +1,6 @@ -etotref -1964.0663947982975515 +etotref -1964.0663947982770878 etotperatomref -982.0331973991 -totalforceref 0.162298 -totalstressref 1877.059021 +totalforceref 0.162158 +totalstressref 1877.059089 totaldosref 38 -totaltimeref 7.05 +totaltimeref 16.23 diff --git a/tests/03_NAO_multik/scf_u_spin4/result.ref b/tests/03_NAO_multik/scf_u_spin4/result.ref index 74a7a7e4d72..bdf978fbbe7 100644 --- a/tests/03_NAO_multik/scf_u_spin4/result.ref +++ b/tests/03_NAO_multik/scf_u_spin4/result.ref @@ -1,5 +1,5 @@ -etotref -6789.2817503491569369 -etotperatomref -3394.6408751746 -totalforceref 11.335196 -totalstressref 4892.274915 -totaltimeref 4.70 +etotref -6789.2816406266510967 +etotperatomref -3394.6408203133 +totalforceref 11.331534 +totalstressref 4697.832232 +totaltimeref 9.71