diff --git a/source/module_esolver/esolver_ks_lcao_elec.cpp b/source/module_esolver/esolver_ks_lcao_elec.cpp index b1c080a445..884e0c33e5 100644 --- a/source/module_esolver/esolver_ks_lcao_elec.cpp +++ b/source/module_esolver/esolver_ks_lcao_elec.cpp @@ -453,6 +453,9 @@ void ESolver_KS_LCAO::others(const int istep) } else if (cal_type == "get_wf") { + // Note: on the removal of LOWF + // here the LOWF is involved by gathering the wave functions from 2D BCD + // to serial. Parameter list of begin() should be updated. IState_Envelope IEP(this->pelec); if (GlobalV::GAMMA_ONLY_LOCAL) { @@ -460,7 +463,7 @@ void ESolver_KS_LCAO::others(const int istep) this->pw_rho, this->pw_wfc, this->pw_big, - this->LOWF, + this->orb_con.ParaV, this->GG, INPUT.out_wfc_pw, this->wf.out_wfc_r, @@ -479,7 +482,7 @@ void ESolver_KS_LCAO::others(const int istep) this->pw_rho, this->pw_wfc, this->pw_big, - this->LOWF, + this->orb_con.ParaV, this->GK, INPUT.out_wfc_pw, this->wf.out_wfc_r, diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.cpp index 67b123c93a..c96ae67e0d 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_charge.cpp @@ -3,13 +3,13 @@ #include "module_base/blas_connector.h" #include "module_base/timer.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" - +#include "module_io/read_wfc_lcao.h" // Shen Yu add 2019/5/9 extern "C" { - void Cblacs_gridinfo(int icontxt, int *nprow, int *npcol, int *myprow, int *mypcol); - void Cblacs_pinfo(int *myid, int *nprocs); - void Cblacs_pcoord(int icontxt, int pnum, int *prow, int *pcol); + void Cblacs_gridinfo(int icontxt, int* nprow, int* npcol, int* myprow, int* mypcol); + void Cblacs_pinfo(int* myid, int* nprocs); + void Cblacs_pcoord(int icontxt, int pnum, int* prow, int* pcol); int Cblacs_pnum(int icontxt, int prow, int pcol); } @@ -58,32 +58,38 @@ Local_Orbital_Charge::~Local_Orbital_Charge() } void Local_Orbital_Charge::allocate_dm_wfc(const Grid_Technique& gt, - elecstate::ElecState* pelec, - Local_Orbital_wfc& lowf, - psi::Psi* psi, - const K_Vectors& kv, - const int& istep) + elecstate::ElecState* pelec, + Local_Orbital_wfc& lowf, + psi::Psi* psi, + const K_Vectors& kv, + const int& istep) { ModuleBase::TITLE("Local_Orbital_Charge", "allocate_dm_wfc"); this->LOWF = &lowf; this->LOWF->gridt = > // here we reset the density matrix dimension. + // it is weird the psi_gamma is allocated inside the function allocate_gamma + // but psi_k is allocated outside the function allocate_DM_k + // and why the allocation of psi and DM are done together??? + // lowf.gamma_file(psi, pelec); + // will be replaced by this->allocate_gamma(gt.lgd, psi, pelec, kv.get_nks(), istep); return; } -void Local_Orbital_Charge::allocate_dm_wfc(const Grid_Technique >, - elecstate::ElecState* pelec, - Local_Orbital_wfc& lowf, - psi::Psi>* psi, - const K_Vectors& kv, - const int& istep) +void Local_Orbital_Charge::allocate_dm_wfc(const Grid_Technique& gt, + elecstate::ElecState* pelec, + Local_Orbital_wfc& lowf, + psi::Psi>* psi, + const K_Vectors& kv, + const int& istep) { ModuleBase::TITLE("Local_Orbital_Charge", "allocate_dm_wfc"); this->LOWF = &lowf; this->LOWF->gridt = > // here we reset the density matrix dimension. + // lgd will be the dimension of global matrix and nbasis of psi to be of the local one lowf.allocate_k(gt.lgd, psi, pelec, kv.get_nks(), kv.get_nkstot(), kv.kvec_c, istep); this->allocate_DM_k(kv.get_nks(), gt.nnrg); return; diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.cpp b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.cpp index 08779b3608..60498c934a 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.cpp +++ b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.cpp @@ -236,96 +236,3 @@ int Local_Orbital_wfc::localIndex(int globalindex, int nblk, int nprocs, int& my myproc = int((globalindex % (nblk * nprocs)) / nblk); return int(globalindex / (nblk * nprocs)) * nblk + globalindex % nblk; } - -#ifdef __MPI -void Local_Orbital_wfc::wfc_2d_to_grid(const double* wfc_2d, - double** wfc_grid, - const int ik, - const ModuleBase::matrix& ekb, - const ModuleBase::matrix& wg) -{ - ModuleBase::TITLE(" Local_Orbital_wfc", "wfc_2d_to_grid"); - ModuleBase::timer::tick("Local_Orbital_wfc", "wfc_2d_to_grid"); - - const Parallel_Orbitals* pv = this->ParaV; - const int inc = 1; - int myid = 0; - MPI_Comm_rank(pv->comm_2D, &myid); - int info = 0; - - // calculate maxnloc for bcasting 2d-wfc - long maxnloc; // maximum number of elements in local matrix - info = MPI_Reduce(&pv->nloc_wfc, &maxnloc, 1, MPI_LONG, MPI_MAX, 0, pv->comm_2D); - info = MPI_Bcast(&maxnloc, 1, MPI_LONG, 0, pv->comm_2D); - std::vector work(maxnloc); // work/buffer matrix - - int naroc[2]; // maximum number of row or column - for (int iprow = 0; iprow < pv->dim0; ++iprow) - { - for (int ipcol = 0; ipcol < pv->dim1; ++ipcol) - { - const int coord[2] = {iprow, ipcol}; - int src_rank; - info = MPI_Cart_rank(pv->comm_2D, coord, &src_rank); - if (myid == src_rank) - { - BlasConnector::copy(pv->nloc_wfc, wfc_2d, inc, work.data(), inc); - naroc[0] = pv->nrow; - naroc[1] = pv->ncol_bands; - } - info = MPI_Bcast(naroc, 2, MPI_INT, src_rank, pv->comm_2D); - info = MPI_Bcast(work.data(), maxnloc, MPI_DOUBLE, src_rank, pv->comm_2D); - - info = this->set_wfc_grid(naroc, pv->nb, pv->dim0, pv->dim1, iprow, ipcol, work.data(), wfc_grid); - - } // loop ipcol - } // loop iprow - ModuleBase::timer::tick("Local_Orbital_wfc", "wfc_2d_to_grid"); -} - -void Local_Orbital_wfc::wfc_2d_to_grid(const std::complex* wfc_2d, - std::complex** wfc_grid, - int ik, - const ModuleBase::matrix& ekb, - const ModuleBase::matrix& wg, - const std::vector>& kvec_c) -{ - ModuleBase::TITLE("Local_Orbital_wfc", "wfc_2d_to_grid"); - ModuleBase::timer::tick("Local_Orbital_wfc", "wfc_2d_to_grid"); - - const Parallel_Orbitals* pv = this->ParaV; - const int inc = 1; - int myid = 0; - MPI_Comm_rank(pv->comm_2D, &myid); - int info = 0; - - // calculate maxnloc for bcasting 2d-wfc - long maxnloc = 0; // maximum number of elements in local matrix - info = MPI_Reduce(&pv->nloc_wfc, &maxnloc, 1, MPI_LONG, MPI_MAX, 0, pv->comm_2D); - info = MPI_Bcast(&maxnloc, 1, MPI_LONG, 0, pv->comm_2D); - std::vector> work(maxnloc); // work/buffer matrix - - int naroc[2] = {0}; // maximum number of row or column - for (int iprow = 0; iprow < pv->dim0; ++iprow) - { - for (int ipcol = 0; ipcol < pv->dim1; ++ipcol) - { - const int coord[2] = {iprow, ipcol}; - int src_rank; - info = MPI_Cart_rank(pv->comm_2D, coord, &src_rank); - if (myid == src_rank) - { - BlasConnector::copy(pv->nloc_wfc, wfc_2d, inc, work.data(), inc); - naroc[0] = pv->nrow; - naroc[1] = pv->ncol_bands; - } - info = MPI_Bcast(naroc, 2, MPI_INT, src_rank, pv->comm_2D); - info = MPI_Bcast(work.data(), maxnloc, MPI_DOUBLE_COMPLEX, src_rank, pv->comm_2D); - // mohan update 2021-02-12, delte BFIELD option - info = this->set_wfc_grid(naroc, pv->nb, pv->dim0, pv->dim1, iprow, ipcol, work.data(), wfc_grid); - } // loop ipcol - } // loop iprow - - ModuleBase::timer::tick("Local_Orbital_wfc", "wfc_2d_to_grid"); -} -#endif diff --git a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.h b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.h index b1a0b4a8b7..de35c20f4a 100644 --- a/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.h +++ b/source/module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.h @@ -10,22 +10,9 @@ class Local_Orbital_wfc { -public: - - Local_Orbital_wfc(); - ~Local_Orbital_wfc(); - // refactor new implementation: RAII - // a more look-looking name would be LocalOrbitalWfc, I suppose... - Local_Orbital_wfc(const int& nspin, - const int& nks, - const int& nbands, - const int& nlocal, - const int& gamma_only, - const int& nb2d, - const std::string& ks_solver, - const std::string& readin_dir); - // - void initialize(); + public: + Local_Orbital_wfc(); + ~Local_Orbital_wfc(); ///========================================= /// grid wfc /// used to generate density matrix: LOC.DM_R, @@ -45,12 +32,12 @@ class Local_Orbital_wfc /// read wavefunction coefficients: WFC_NAO_K/GAMMA*.txt void gamma_file(psi::Psi* psid, elecstate::ElecState* pelec); void allocate_k(const int& lgd, - psi::Psi>* psi, - elecstate::ElecState* pelec, - const int& nks, - const int& nkstot, - const std::vector>& kvec_c, - const int& istep); + psi::Psi>* psi, + elecstate::ElecState* pelec, + const int& nks, + const int& nkstot, + const std::vector>& kvec_c, + const int& istep); //========================================= // Init Cij, make it satisfy 2 conditions: @@ -65,96 +52,12 @@ class Local_Orbital_wfc static int globalIndex(int localindex, int nblk, int nprocs, int myproc); static int localIndex(int globalindex, int nblk, int nprocs, int& myproc); -#ifdef __MPI - ///========================================= - /// Parallel: convert the distribution of wavefunction from 2D to grid - ///========================================= - /// For gamma_only, T = double; - /// For multi-k, T = complex; - /// Set myid and ctot when output is needed; - /// Set wfc as nullptr when 2d-to-grid convertion is not needed. - - // Notice: here I reload this function rather than use template - // (in which the implementation should be put in header file ) - // because sub-function `write_wfc_nao_complex`contains GlobalC declared in `global.h` - // which will cause lots of "not defined" if included in a header file. - void wfc_2d_to_grid(const double* wfc_2d, - double** wfc_grid, - const int ik, - const ModuleBase::matrix& ekb, - const ModuleBase::matrix& wg); - void wfc_2d_to_grid(const std::complex* wfc_2d, - std::complex** wfc_grid, - int ik, - const ModuleBase::matrix& ekb, - const ModuleBase::matrix& wg, - const std::vector>& kvec_c); -#endif - int error = 0; private: - - template - int set_wfc_grid(int naroc[2], - int nb, - int dim0, - int dim1, - int iprow, - int ipcol, - T* work, - T** wfc, - int myid = -1, - T** ctot = nullptr); - bool wfck_flag; bool complex_flag; bool allocate_flag; int nks; }; - - -// the function should not be defined here!! mohan 2024-03-28 -template -int Local_Orbital_wfc::set_wfc_grid(int naroc[2], - int nb, - int dim0, - int dim1, - int iprow, - int ipcol, - T* work, - T** wfc, - int myid, - T** ctot) -{ -#ifdef __MPI - ModuleBase::TITLE(" Local_Orbital_wfc", "set_wfc_grid"); - if (!wfc && !ctot) - { - return 0; - } - for (int j = 0; j < naroc[1]; ++j) - { - int igcol = globalIndex(j, nb, dim1, ipcol); - if (igcol >= GlobalV::NBANDS) - { - continue; - } - for (int i = 0; i < naroc[0]; ++i) - { - int igrow = globalIndex(i, nb, dim0, iprow); - int mu_local = this->gridt->trace_lo[igrow]; - if (wfc && mu_local >= 0) - { - wfc[igcol][mu_local] = work[j * naroc[0] + i]; - } - if (ctot && myid == 0) - { - ctot[igcol][igrow] = work[j * naroc[0] + i]; - } - } - } -#endif - return 0; -} #endif diff --git a/source/module_io/istate_envelope.cpp b/source/module_io/istate_envelope.cpp index 6699232341..8cc83e8db9 100644 --- a/source/module_io/istate_envelope.cpp +++ b/source/module_io/istate_envelope.cpp @@ -2,14 +2,15 @@ #include "module_base/global_function.h" #include "module_base/global_variable.h" +#include "module_base/memory.h" +#include "module_base/timer.h" #include "module_hamilt_pw/hamilt_pwdft/global.h" #include "module_io/rho_io.h" #include "module_io/write_wfc_pw.h" #include "module_io/write_wfc_r.h" - -IState_Envelope::IState_Envelope(const elecstate::ElecState* pes_in) +IState_Envelope::IState_Envelope(const elecstate::ElecState* pes) { - pes_ = pes_in; + pes_ = pes; } IState_Envelope::~IState_Envelope() @@ -20,7 +21,7 @@ void IState_Envelope::begin(const psi::Psi* psid, const ModulePW::PW_Basis* rhopw, const ModulePW::PW_Basis_K* wfcpw, const ModulePW::PW_Basis_Big* bigpw, - Local_Orbital_wfc& lowf, + const Parallel_Orbitals& para_orb, Gint_Gamma& gg, int& out_wfc_pw, int& out_wfc_r, @@ -158,6 +159,10 @@ void IState_Envelope::begin(const psi::Psi* psid, wfc_gamma_grid[is][ib] = new double[gg.gridt->lgd]; } + const double mem_size = sizeof(double) * double(gg.gridt->lgd) * double(nbands) * double(nspin) / 1024.0 / 1024.0; + ModuleBase::Memory::record("IState_Envelope::begin::wfc_gamma_grid", mem_size); + printf(" Estimated on-the-fly memory consuming by IState_Envelope::begin::wfc_gamma_grid: %f MB\n", mem_size); + // for pw-wfc in G space psi::Psi> pw_wfc_g; @@ -170,15 +175,19 @@ void IState_Envelope::begin(const psi::Psi* psid, { if (bands_picked_[ib]) { - for (int is = 0; is < nspin; ++is) + for (int is = 0; is < nspin; ++is) // loop over spin { std::cout << " Perform envelope function for band " << ib + 1 << std::endl; ModuleBase::GlobalFunc::ZEROS(pes_->charge->rho[is], wfcpw->nrxx); psid->fix_k(is); #ifdef __MPI - lowf.wfc_2d_to_grid(psid->get_pointer(), wfc_gamma_grid[is], is, this->pes_->ekb, this->pes_->wg); + wfc_2d_to_grid(psid->get_pointer(), para_orb, wfc_gamma_grid[is], gg.gridt->trace_lo); #else + // if not MPI enabled, it is the case psid holds a global matrix. use fix_k to switch between different + // spin channels (actually kpoints, because now the same kpoint in different spin channels are treated + // as distinct kpoints) + for (int i = 0; i < nbands; ++i) { for (int j = 0; j < nlocal; ++j) @@ -242,7 +251,7 @@ void IState_Envelope::begin(const psi::Psi>* psi, const ModulePW::PW_Basis* rhopw, const ModulePW::PW_Basis_K* wfcpw, const ModulePW::PW_Basis_Big* bigpw, - Local_Orbital_wfc& lowf, + const Parallel_Orbitals& para_orb, Gint_k& gk, int& out_wf, int& out_wf_r, @@ -284,6 +293,7 @@ void IState_Envelope::begin(const psi::Psi>* psi, // if ucell is even, it's also correct. // +1.0e-8 in case like (2.999999999+1)/2 // if NSPIN=4, each band only one electron, fermi_band should be nelec + std::cout << " number of electrons = " << nelec << std::endl; fermi_band = nspin < 4 ? static_cast((nelec + 1) / 2 + 1.0e-8) : nelec; std::cout << " number of occupied bands = " << fermi_band << std::endl; @@ -372,6 +382,19 @@ void IState_Envelope::begin(const psi::Psi>* psi, // (2.3) output the charge density in .cub format. + // allocate grid wavefunction for gamma_only + std::vector**> wfc_k_grid(nspin); + for (int ik = 0; ik < kv.get_nks(); ++ik) + { + wfc_k_grid[ik] = new std::complex*[nbands]; + for (int ib = 0; ib < nbands; ++ib) + wfc_k_grid[ik][ib] = new std::complex[gk.gridt->lgd]; + } + const double mem_size = sizeof(std::complex) * double(gk.gridt->lgd) * double(nbands) * double(nspin) + * double(kv.get_nks()) / 1024.0 / 1024.0; + ModuleBase::Memory::record("IState_Envelope::begin::wfc_k_grid", mem_size); + printf(" Estimated on-the-fly memory consuming by IState_Envelope::begin::wfc_k_grid: %f MB\n", mem_size); + // for pw-wfc in G space psi::Psi> pw_wfc_g(kv.ngk.data()); @@ -388,37 +411,28 @@ void IState_Envelope::begin(const psi::Psi>* psi, for (int ik = 0; ik < kv.get_nks(); ++ik) // the loop of nspin0 is included { const int ispin = kv.isk[ik]; - ModuleBase::GlobalFunc::ZEROS(pes_->charge->rho[ispin], wfcpw->nrxx); + ModuleBase::GlobalFunc::ZEROS(pes_->charge->rho[ispin], + wfcpw->nrxx); // terrible, you make changes on another instance's data??? std::cout << " Perform envelope function for kpoint " << ik << ", band" << ib + 1 << std::endl; // 2d-to-grid conversion is unified into `wfc_2d_to_grid`. psi->fix_k(ik); -#ifdef __MPI - // need to deal with NSPIN=4 !!!! - lowf.wfc_2d_to_grid(psi->get_pointer(), - lowf.wfc_k_grid[ik], - ik, - this->pes_->ekb, - this->pes_->wg, - kv.kvec_c); +#ifdef __MPI // need to deal with NSPIN=4 !!!! + wfc_2d_to_grid(psi->get_pointer(), para_orb, wfc_k_grid[ik], gk.gridt->trace_lo); #else for (int i = 0; i < nbands; ++i) { for (int j = 0; j < nlocal; ++j) - lowf.wfc_k_grid[ik][i][j] = psi[0](i, j); + wfc_k_grid[ik][i][j] = psi[0](i, j); } #endif // deal with NSPIN=4 - gk.cal_env_k(ik, - lowf.wfc_k_grid[ik][ib], - pes_->charge->rho[ispin], - kv.kvec_c, - kv.kvec_d, - GlobalC::ucell); + gk.cal_env_k(ik, wfc_k_grid[ik][ib], pes_->charge->rho[ispin], kv.kvec_c, kv.kvec_d, GlobalC::ucell); std::stringstream ss; ss << global_out_dir << "BAND" << ib + 1 << "_k_" << ik / nspin0 + 1 << "_s_" << ispin + 1 << "_ENV.cube"; const double ef_tmp = this->pes_->eferm.get_efval(ispin); + ModuleIO::write_rho( #ifdef __MPI bigpw->bz, @@ -463,6 +477,13 @@ void IState_Envelope::begin(const psi::Psi>* psi, } } + for (int is = 0; is < nspin; ++is) + { + for (int ib = 0; ib < nbands; ++ib) + delete[] wfc_k_grid[is][ib]; + delete[] wfc_k_grid[is]; + } + return; } @@ -487,3 +508,159 @@ void IState_Envelope::set_pw_wfc(const ModulePW::PW_Basis_K* wfcpw, // call FFT wfcpw->real2recip(Porter.data(), &wfc_g(ib, 0), ik); } + +#ifdef __MPI +template +int IState_Envelope::set_wfc_grid(const int naroc[2], + const int nb, + const int dim0, + const int dim1, + const int iprow, + const int ipcol, + const T* in, + T** out, + const std::vector& trace_lo) +{ + ModuleBase::TITLE(" Local_Orbital_wfc", "set_wfc_grid"); + if (!out) + { + return 0; + } + for (int j = 0; j < naroc[1]; ++j) + { + int igcol = globalIndex(j, nb, dim1, ipcol); + if (igcol >= GlobalV::NBANDS) + { + continue; + } + for (int i = 0; i < naroc[0]; ++i) + { + int igrow = globalIndex(i, nb, dim0, iprow); + int mu_local = trace_lo[igrow]; + if (out && mu_local >= 0) + { + out[igcol][mu_local] = in[j * naroc[0] + i]; + } + } + } + return 0; +} + +template int IState_Envelope::set_wfc_grid(const int naroc[2], + const int nb, + const int dim0, + const int dim1, + const int iprow, + const int ipcol, + const double* in, + double** out, + const std::vector& trace_lo); +template int IState_Envelope::set_wfc_grid(const int naroc[2], + const int nb, + const int dim0, + const int dim1, + const int iprow, + const int ipcol, + const std::complex* in, + std::complex** out, + const std::vector& trace_lo); + +template +void IState_Envelope::wfc_2d_to_grid(const T* lowf_2d, + const Parallel_Orbitals& pv, + T** lowf_grid, + const std::vector& trace_lo) +{ + ModuleBase::TITLE(" Local_Orbital_wfc", "wfc_2d_to_grid"); + ModuleBase::timer::tick("Local_Orbital_wfc", "wfc_2d_to_grid"); + + // dimension related + const int nlocal = pv.desc_wfc[2]; + const int nbands = pv.desc_wfc[3]; + + // MPI and memory related + const int mem_stride = 1; + int mpi_info = 0; + auto mpi_dtype = std::is_same::value ? MPI_DOUBLE : MPI_DOUBLE_COMPLEX; + + // get the rank of the current process + int rank = 0; + MPI_Comm_rank(pv.comm_2D, &rank); + + // calculate the maximum number of nlocal over all processes in pv.comm_2D range + long buf_size; + mpi_info = MPI_Reduce(&pv.nloc_wfc, &buf_size, 1, MPI_LONG, MPI_MAX, 0, pv.comm_2D); + mpi_info = MPI_Bcast(&buf_size, 1, MPI_LONG, 0, pv.comm_2D); // get and then broadcast + std::vector lowf_block(buf_size); + + // this quantity seems to have the value returned by function numroc_ in ScaLAPACK? + int naroc[2]; + + // loop over all processors + for (int iprow = 0; iprow < pv.dim0; ++iprow) + { + for (int ipcol = 0; ipcol < pv.dim1; ++ipcol) + { + // get the rank of the processor at the given coordinate + int rank_at_coord; + const int mpi_cart_coord[2] = {iprow, ipcol}; + mpi_info = MPI_Cart_rank(pv.comm_2D, mpi_cart_coord, &rank_at_coord); // get the MPI rank + + // keep in mind present function is concurrently called by all processors, thus + // the following code block will only be executed once for each processor, which means + // for each processor, get its MPI rank and MPI coord, then assign the naroc[0] and naroc[1] + // with the value which should have been calculated automatically by ScaLAPACK function + // numroc_. + if (rank == rank_at_coord) + { + BlasConnector::copy(pv.nloc_wfc, lowf_2d, mem_stride, lowf_block.data(), mem_stride); + naroc[0] = pv.nrow; + naroc[1] = pv.ncol_bands; + } + + // broadcast the number of row and column + mpi_info = MPI_Bcast(naroc, 2, MPI_INT, rank_at_coord, pv.comm_2D); + + // broadcast the data, this means the data owned by one processor is broadcast + // to all other processors in the communicator. + mpi_info = MPI_Bcast(lowf_block.data(), buf_size, mpi_dtype, rank_at_coord, pv.comm_2D); + + // then use it to set the wfc_grid. + mpi_info = this->set_wfc_grid(naroc, + pv.nb, + pv.dim0, + pv.dim1, + iprow, + ipcol, + lowf_block.data(), + lowf_grid, + trace_lo); + // this operation will let all processors have the same wfc_grid + } + } + ModuleBase::timer::tick("Local_Orbital_wfc", "wfc_2d_to_grid"); +} + +template void IState_Envelope::wfc_2d_to_grid(const double* lowf_2d, + const Parallel_Orbitals& pv, + double** lowf_grid, + const std::vector& trace_lo); +template void IState_Envelope::wfc_2d_to_grid(const std::complex* lowf_2d, + const Parallel_Orbitals& pv, + std::complex** lowf_grid, + const std::vector& trace_lo); +#endif + +int IState_Envelope::globalIndex(int localindex, int nblk, int nprocs, int myproc) +{ + int iblock, gIndex; + iblock = localindex / nblk; + gIndex = (iblock * nprocs + myproc) * nblk + localindex % nblk; + return gIndex; +} + +int IState_Envelope::localIndex(int globalindex, int nblk, int nprocs, int& myproc) +{ + myproc = int((globalindex % (nblk * nprocs)) / nblk); + return int(globalindex / (nblk * nprocs)) * nblk + globalindex % nblk; +} \ No newline at end of file diff --git a/source/module_io/istate_envelope.h b/source/module_io/istate_envelope.h index 0e3a9e49e5..1695927908 100644 --- a/source/module_io/istate_envelope.h +++ b/source/module_io/istate_envelope.h @@ -1,9 +1,9 @@ #ifndef ISTATE_ENVELOPE_H #define ISTATE_ENVELOPE_H +#include "module_basis/module_ao/parallel_orbitals.h" #include "module_basis/module_pw/pw_basis_k.h" #include "module_cell/klist.h" #include "module_elecstate/elecstate.h" -#include "module_hamilt_lcao/hamilt_lcaodft/local_orbital_wfc.h" #include "module_hamilt_lcao/module_gint/gint_gamma.h" #include "module_hamilt_lcao/module_gint/gint_k.h" #include "module_hamilt_pw/hamilt_pwdft/structure_factor.h" @@ -13,7 +13,7 @@ class IState_Envelope { public: - IState_Envelope(const elecstate::ElecState* pes_in); + IState_Envelope(const elecstate::ElecState* pes); ~IState_Envelope(); /// for gamma_only @@ -21,7 +21,7 @@ class IState_Envelope const ModulePW::PW_Basis* rhopw, const ModulePW::PW_Basis_K* wfcpw, const ModulePW::PW_Basis_Big* bigpw, - Local_Orbital_wfc& lowf, + const Parallel_Orbitals& para_orb, Gint_Gamma& gg, int& out_wfc_pw, int& out_wfc_r, @@ -39,7 +39,7 @@ class IState_Envelope const ModulePW::PW_Basis* rhopw, const ModulePW::PW_Basis_K* wfcpw, const ModulePW::PW_Basis_Big* bigpw, - Local_Orbital_wfc& lowf, + const Parallel_Orbitals& para_orb, Gint_k& gg, int& out_wfc_pw, int& out_wfc_r, @@ -59,7 +59,7 @@ class IState_Envelope const ModulePW::PW_Basis* rhopw, const ModulePW::PW_Basis_K* wfcpw, const ModulePW::PW_Basis_Big* bigpw, - Local_Orbital_wfc& lowf, + const Parallel_Orbitals& para_orb, Gint_k& gk, int& out_wfc_pw, int& out_wfc_r, @@ -77,7 +77,7 @@ class IState_Envelope const ModulePW::PW_Basis* rhopw, const ModulePW::PW_Basis_K* wfcpw, const ModulePW::PW_Basis_Big* bigpw, - Local_Orbital_wfc& lowf, + const Parallel_Orbitals& para_orb, Gint_Gamma& gk, int& out_wfc_pw, int& out_wfc_r, @@ -103,5 +103,22 @@ class IState_Envelope const int& nspin, const double* const* const rho, psi::Psi>& wfc_g); + static int globalIndex(int localindex, int nblk, int nprocs, int myproc); + static int localIndex(int globalindex, int nblk, int nprocs, int& myproc); + +#ifdef __MPI + template + int set_wfc_grid(const int naroc[2], + const int nb, + const int dim0, + const int dim1, + const int iprow, + const int ipcol, + const T* in, + T** out, + const std::vector& trace_lo); + template + void wfc_2d_to_grid(const T* wfc_2d, const Parallel_Orbitals& pv, T** wfc_grid, const std::vector& trace_lo); +#endif }; #endif diff --git a/source/module_io/read_wfc_lcao.cpp b/source/module_io/read_wfc_lcao.cpp index 25bc99cb3a..20766203de 100644 --- a/source/module_io/read_wfc_lcao.cpp +++ b/source/module_io/read_wfc_lcao.cpp @@ -6,14 +6,20 @@ #include #include #include +#include #ifdef __MPI #include "module_base/parallel_common.h" #endif template -void ModuleIO::read_abacus_lowf(const std::string& flowf, int& ik, ModuleBase::Vector3& kvec_c, int& nbands, - int& nbasis, std::vector>& lowf, std::vector& ekb, +void ModuleIO::read_abacus_lowf(const std::string& flowf, + int& ik, + ModuleBase::Vector3& kvec_c, + int& nbands, + int& nbasis, + std::vector>& lowf, + std::vector& ekb, std::vector& occ, double& wk) //<[out] wavefunction coefficients { @@ -101,16 +107,34 @@ void ModuleIO::read_abacus_lowf(const std::string& flowf, int& ik, ModuleBase::V #endif } // instantiate the template function -template void ModuleIO::read_abacus_lowf(const std::string& flowf, int& ik, ModuleBase::Vector3& kvec_c, - int& nbands, int& nbasis, std::vector>& lowf, - std::vector& ekb, std::vector& occ, double& wk); -template void ModuleIO::read_abacus_lowf(const std::string& flowf, int& ik, ModuleBase::Vector3& kvec_c, - int& nbands, int& nbasis, std::vector>& lowf, - std::vector& ekb, std::vector& occ, double& wk); +template void ModuleIO::read_abacus_lowf(const std::string& flowf, + int& ik, + ModuleBase::Vector3& kvec_c, + int& nbands, + int& nbasis, + std::vector>& lowf, + std::vector& ekb, + std::vector& occ, + double& wk); +template void ModuleIO::read_abacus_lowf(const std::string& flowf, + int& ik, + ModuleBase::Vector3& kvec_c, + int& nbands, + int& nbasis, + std::vector>& lowf, + std::vector& ekb, + std::vector& occ, + double& wk); template -void ModuleIO::read_abacus_lowf(const std::string& flowf, int& ik, ModuleBase::Vector3& kvec_c, int& nbands, - int& nbasis, std::vector& lowf, std::vector& ekb, std::vector& occ, +void ModuleIO::read_abacus_lowf(const std::string& flowf, + int& ik, + ModuleBase::Vector3& kvec_c, + int& nbands, + int& nbasis, + std::vector& lowf, + std::vector& ekb, + std::vector& occ, double& wk) { std::ifstream ifs(flowf.c_str()); @@ -190,19 +214,37 @@ void ModuleIO::read_abacus_lowf(const std::string& flowf, int& ik, ModuleBase::V #endif } // instantiate the template function -template void ModuleIO::read_abacus_lowf(const std::string& flowf, int& ik, ModuleBase::Vector3& kvec_c, - int& nbands, int& nbasis, std::vector& lowf, std::vector& ekb, - std::vector& occ, double& wk); -template void ModuleIO::read_abacus_lowf(const std::string& flowf, int& ik, ModuleBase::Vector3& kvec_c, - int& nbands, int& nbasis, std::vector& lowf, std::vector& ekb, - std::vector& occ, double& wk); +template void ModuleIO::read_abacus_lowf(const std::string& flowf, + int& ik, + ModuleBase::Vector3& kvec_c, + int& nbands, + int& nbasis, + std::vector& lowf, + std::vector& ekb, + std::vector& occ, + double& wk); +template void ModuleIO::read_abacus_lowf(const std::string& flowf, + int& ik, + ModuleBase::Vector3& kvec_c, + int& nbands, + int& nbasis, + std::vector& lowf, + std::vector& ekb, + std::vector& occ, + double& wk); #ifdef __MPI template void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the file name to be WFC_NAO_K*.txt? - const Parallel_2D& p2d, const int& nks, int& nbands, int& nbasis, - std::vector& lowf_loc, std::vector& ekb, std::vector& occ, - std::vector>& kvec_c, std::vector& wk) + const Parallel_2D& p2d, + const int& nks, + int& nbands, + int& nbasis, + std::vector& lowf_loc, + std::vector& ekb, + std::vector& occ, + std::vector>& kvec_c, + std::vector& wk) { // reset vectors lowf_loc.clear(); @@ -210,6 +252,10 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi occ.clear(); kvec_c.clear(); wk.clear(); + const bool gamma_only = std::is_same::value || std::is_same::value; + const bool multi_k = std::is_same>::value || std::is_same>::value; + assert(gamma_only || multi_k); + const std::string flowf_prefix = gamma_only ? "WFC_GAMMA" : "WFC_NAO_K"; // MPI-related variables init int iproc; MPI_Comm_rank(p2d.comm_2D, &iproc); @@ -218,7 +264,7 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi for (int ik = 0; ik < nks; ik++) { // check existence of file - const std::string flowf = out_dir + "/WFC_NAO_K" + std::to_string(ik + 1) + ".txt"; + const std::string flowf = out_dir + "/" + flowf_prefix + std::to_string(ik + 1) + ".txt"; std::ifstream ifs(flowf); if (!ifs) ModuleBase::WARNING_QUIT("restart_from_file", "open file failed: " + flowf); @@ -250,8 +296,17 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi Parallel_Common::bcast_int(nbasis); p2d_glb.init(nbasis, nbands, std::max(nbasis, nbands), p2d.comm_2D); // in the same comm world lowf_loc_k.resize(p2d.nrow * p2d.ncol); - Cpxgemr2d(nbasis, nbands, lowf_glb.data(), 1, 1, const_cast(p2d_glb.desc), lowf_loc_k.data(), 1, 1, - const_cast(p2d.desc), p2d_glb.blacs_ctxt); + Cpxgemr2d(nbasis, + nbands, + lowf_glb.data(), + 1, + 1, + const_cast(p2d_glb.desc), + lowf_loc_k.data(), + 1, + 1, + const_cast(p2d.desc), + p2d_glb.blacs_ctxt); // append to the global lowf_loc lowf_loc.insert(lowf_loc.end(), lowf_loc_k.begin(), lowf_loc_k.end()); } @@ -276,29 +331,58 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi } } // instantiate the template function -template void ModuleIO::restart_from_file(const std::string& out_dir, const Parallel_2D& p2d, const int& nks, - int& nbands, int& nbasis, std::vector& lowf_loc, - std::vector& ekb, std::vector& occ, - std::vector>& kvec_c, std::vector& wk); -template void ModuleIO::restart_from_file(const std::string& out_dir, const Parallel_2D& p2d, const int& nks, - int& nbands, int& nbasis, std::vector& lowf_loc, - std::vector& ekb, std::vector& occ, - std::vector>& kvec_c, std::vector& wk); -template void ModuleIO::restart_from_file(const std::string& out_dir, const Parallel_2D& p2d, const int& nks, - int& nbands, int& nbasis, std::vector>& lowf_loc, - std::vector& ekb, std::vector& occ, - std::vector>& kvec_c, std::vector& wk); -template void ModuleIO::restart_from_file(const std::string& out_dir, const Parallel_2D& p2d, const int& nks, - int& nbands, int& nbasis, std::vector>& lowf_loc, - std::vector& ekb, std::vector& occ, - std::vector>& kvec_c, std::vector& wk); +template void ModuleIO::restart_from_file(const std::string& out_dir, + const Parallel_2D& p2d, + const int& nks, + int& nbands, + int& nbasis, + std::vector& lowf_loc, + std::vector& ekb, + std::vector& occ, + std::vector>& kvec_c, + std::vector& wk); +template void ModuleIO::restart_from_file(const std::string& out_dir, + const Parallel_2D& p2d, + const int& nks, + int& nbands, + int& nbasis, + std::vector& lowf_loc, + std::vector& ekb, + std::vector& occ, + std::vector>& kvec_c, + std::vector& wk); +template void ModuleIO::restart_from_file(const std::string& out_dir, + const Parallel_2D& p2d, + const int& nks, + int& nbands, + int& nbasis, + std::vector>& lowf_loc, + std::vector& ekb, + std::vector& occ, + std::vector>& kvec_c, + std::vector& wk); +template void ModuleIO::restart_from_file(const std::string& out_dir, + const Parallel_2D& p2d, + const int& nks, + int& nbands, + int& nbasis, + std::vector>& lowf_loc, + std::vector& ekb, + std::vector& occ, + std::vector>& kvec_c, + std::vector& wk); #endif template void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the file name to be WFC_NAO_K*.txt? - const int& nks, int& nbands, int& nbasis, std::vector& lowf, - std::vector& ekb, std::vector& occ, - std::vector>& kvec_c, std::vector& wk) + const int& nks, + int& nbands, + int& nbasis, + std::vector& lowf, + std::vector& ekb, + std::vector& occ, + std::vector>& kvec_c, + std::vector& wk) { // reset vectors lowf.clear(); @@ -306,11 +390,15 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi occ.clear(); kvec_c.clear(); wk.clear(); + const bool gamma_only = std::is_same::value || std::is_same::value; + const bool multi_k = std::is_same>::value || std::is_same>::value; + assert(gamma_only || multi_k); + const std::string flowf_prefix = gamma_only ? "WFC_GAMMA" : "WFC_NAO_K"; int nbands_ = -1, nbasis_ = -1; for (int ik = 0; ik < nks; ik++) { // check existence of file - const std::string flowf = out_dir + "/WFC_NAO_K" + std::to_string(ik + 1) + ".txt"; + const std::string flowf = out_dir + "/" + flowf_prefix + std::to_string(ik + 1) + ".txt"; const std::ifstream ifs(flowf); if (!ifs) ModuleBase::WARNING_QUIT("restart_from_file", "open file failed: " + flowf); @@ -339,17 +427,39 @@ void ModuleIO::restart_from_file(const std::string& out_dir, // hard-code the fi assert(lowf.size() == nks * nbands * nbasis); } // instantiate the template function -template void ModuleIO::restart_from_file(const std::string& out_dir, const int& nks, int& nbands, int& nbasis, - std::vector& lowf, std::vector& ekb, std::vector& occ, - std::vector>& kvec_c, std::vector& wk); -template void ModuleIO::restart_from_file(const std::string& out_dir, const int& nks, int& nbands, int& nbasis, - std::vector& lowf, std::vector& ekb, std::vector& occ, - std::vector>& kvec_c, std::vector& wk); -template void ModuleIO::restart_from_file(const std::string& out_dir, const int& nks, int& nbands, int& nbasis, - std::vector>& lowf, std::vector& ekb, - std::vector& occ, std::vector>& kvec_c, +template void ModuleIO::restart_from_file(const std::string& out_dir, + const int& nks, + int& nbands, + int& nbasis, + std::vector& lowf, + std::vector& ekb, + std::vector& occ, + std::vector>& kvec_c, + std::vector& wk); +template void ModuleIO::restart_from_file(const std::string& out_dir, + const int& nks, + int& nbands, + int& nbasis, + std::vector& lowf, + std::vector& ekb, + std::vector& occ, + std::vector>& kvec_c, + std::vector& wk); +template void ModuleIO::restart_from_file(const std::string& out_dir, + const int& nks, + int& nbands, + int& nbasis, + std::vector>& lowf, + std::vector& ekb, + std::vector& occ, + std::vector>& kvec_c, std::vector& wk); -template void ModuleIO::restart_from_file(const std::string& out_dir, const int& nks, int& nbands, int& nbasis, - std::vector>& lowf, std::vector& ekb, - std::vector& occ, std::vector>& kvec_c, +template void ModuleIO::restart_from_file(const std::string& out_dir, + const int& nks, + int& nbands, + int& nbasis, + std::vector>& lowf, + std::vector& ekb, + std::vector& occ, + std::vector>& kvec_c, std::vector& wk);