Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sparseH for LGPU #526

Merged
merged 52 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
beaa17a
Init commit
multiphaseCFD Oct 20, 2023
97d3f85
Fix std::endl;
vincentmr Oct 20, 2023
705f549
Use more generic indices in base std::size_t.
vincentmr Oct 20, 2023
6781f3d
merge add_py_LGPUMPI
multiphaseCFD Oct 20, 2023
343c62a
add pybind layer
multiphaseCFD Oct 20, 2023
1c0bda2
add python layer
multiphaseCFD Oct 20, 2023
255f6d5
Quick and dirty spham bindings.
vincentmr Oct 20, 2023
25b90c8
Add sparse_ham serialization.
vincentmr Oct 20, 2023
e815e6f
Add sparse_ham tests in tests/test_adjoint_jacobian.py'
vincentmr Oct 20, 2023
7d8f0d9
Bug fix sparse product.
vincentmr Oct 20, 2023
002daee
Merge remote-tracking branch 'origin/add_sparseH_LGPU' into add_spars…
vincentmr Oct 20, 2023
71ebe4b
add sparseH
multiphaseCFD Oct 21, 2023
c06593d
Trigger CI
multiphaseCFD Oct 21, 2023
1cbc50f
Fix python bindings LGPU idxT
vincentmr Oct 23, 2023
5ba180a
Merge remote-tracking branch 'origin/add_sparseH_LGPU' into add_spars…
vincentmr Oct 23, 2023
6956e78
Fix serial tests and update changelog.
vincentmr Oct 23, 2023
b256ead
add more unit tests for sparseH base class
multiphaseCFD Oct 23, 2023
5813cf4
Fix tidy & sparse adjoint test device name.
vincentmr Oct 23, 2023
0045e0f
Merge branch 'add_sparseH_LGPU' into add_sparseH_LKokkos
vincentmr Oct 23, 2023
cc66546
Fix tidy warning for sparse_ham.
vincentmr Oct 23, 2023
dccfe3e
Send backend-specific ops in respective modules.
vincentmr Oct 23, 2023
a014177
Fix sparse_hamiltonianmpi_c and add getWires test.
vincentmr Oct 23, 2023
2f9d14b
Add sparseH diff capability in LQ.
vincentmr Oct 23, 2023
812a1c6
Add sparse Hamiltonian support for Lightning-Kokkos (#527)
vincentmr Oct 23, 2023
bf2bb3f
Merge branch 'add_sparseH_LQubit' into add_sparseH_LGPU
vincentmr Oct 23, 2023
8b2f752
Fix clang tidy
vincentmr Oct 23, 2023
cf2866b
Comment workflows but tidy.
vincentmr Oct 23, 2023
ccace74
Fix tidy warn
vincentmr Oct 23, 2023
ae615e4
Add override to sp::getWires
vincentmr Oct 23, 2023
4d108f3
Restore triggers
vincentmr Oct 23, 2023
f015e2f
Update tests_linux_x86_mpi.yml
vincentmr Oct 23, 2023
ca93578
Add constructibility tests.
vincentmr Oct 24, 2023
62850d7
Move L-Kokkos-CUDA tests to workflow call, called from tests_gpu_cu11…
vincentmr Oct 24, 2023
8777cbe
Merge remote-tracking branch 'origin/add_py_LGPUMPI' into add_sparseH…
vincentmr Oct 24, 2023
a72e2f7
Remove GPU deadlock.
vincentmr Oct 24, 2023
b7b81de
Merge remote-tracking branch 'origin/add_py_LGPUMPI' into add_sparseH…
vincentmr Oct 24, 2023
58f742b
Bug fix Python MPI.
vincentmr Oct 24, 2023
9535f48
Upload both outputs.
vincentmr Oct 24, 2023
6648230
Update gcc version in format.yml.
vincentmr Oct 24, 2023
d0125f3
Merge remote-tracking branch 'origin/add_py_LGPUMPI' into add_sparseH…
vincentmr Oct 24, 2023
200d81d
Update .github/CHANGELOG.md [skip ci]
vincentmr Oct 24, 2023
1a91db5
Update .github/workflows/tests_gpu_kokkos.yml [skip ci]
vincentmr Oct 24, 2023
049cc30
Merge remote-tracking branch 'origin/add_py_LGPUMPI' into add_sparseH…
vincentmr Oct 24, 2023
32236d1
rename argn [skip ci]
vincentmr Oct 24, 2023
999b2c2
Remove unused lines [skip ci]
multiphaseCFD Oct 24, 2023
e3f4854
Fix SparseHamiltonianBase::isEqual. [skip ci]
vincentmr Oct 24, 2023
c26c99b
Trigger CI
vincentmr Oct 24, 2023
031d3fb
Auto update version
github-actions[bot] Oct 24, 2023
4c319a9
Trigger CI
vincentmr Oct 24, 2023
3a53340
resolve comments
multiphaseCFD Oct 24, 2023
3df482b
rename dev_kokkos to dev
multiphaseCFD Oct 24, 2023
b7eb831
Fix tidy.
vincentmr Oct 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 101 additions & 0 deletions pennylane_lightning/core/src/observables/Observables.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,4 +414,105 @@ class HamiltonianBase : public Observable<StateVectorT> {
}
};

/**
* @brief Sparse representation of SparseHamiltonian<T>
*
* @tparam T Floating-point precision.
*/
template <class StateVectorT>
multiphaseCFD marked this conversation as resolved.
Show resolved Hide resolved
class SparseHamiltonianBase : public Observable<StateVectorT> {
public:
using PrecisionT = typename StateVectorT::PrecisionT;
using ComplexT = typename StateVectorT::ComplexT;
// cuSparse required index type
vincentmr marked this conversation as resolved.
Show resolved Hide resolved
using IdxT =
typename std::conditional<std::is_same<PrecisionT, float>::value,
int32_t, int64_t>::type;
multiphaseCFD marked this conversation as resolved.
Show resolved Hide resolved

protected:
std::vector<ComplexT> data_;
std::vector<IdxT> indices_;
std::vector<IdxT> offsets_;
std::vector<std::size_t> wires_;

private:
[[nodiscard]] bool
isEqual(const Observable<StateVectorT> &other) const override {
const auto &other_cast =
static_cast<const SparseHamiltonianBase<StateVectorT> &>(other);

if (data_ != other_cast.data_ || indices_ != other_cast.indices_ ||
offsets_ != other_cast.offsets_) {
return false;
}

return true;
}

public:
/**
* @brief Create a SparseHamiltonianBase from data, indices and offsets in
* CSR format.
*
* @param arg1 Arguments to construct data
* @param arg2 Arguments to construct indices
* @param arg3 Arguments to construct offsets
* @param arg4 Arguments to construct wires
*/
template <typename T1, typename T2, typename T3 = T2,
typename T4 = std::vector<std::size_t>>
SparseHamiltonianBase(T1 &&arg1, T2 &&arg2, T3 &&arg3, T4 &&arg4)
: data_{std::forward<T1>(arg1)}, indices_{std::forward<T2>(arg2)},
offsets_{std::forward<T3>(arg3)}, wires_{std::forward<T4>(arg4)} {
PL_ASSERT(data_.size() == indices_.size());
}
vincentmr marked this conversation as resolved.
Show resolved Hide resolved

/**
* @brief Convenient wrapper for the constructor as the constructor does not
* convert the std::shared_ptr with a derived class correctly.
*
* This function is useful as std::make_shared does not handle
* brace-enclosed initializer list correctly.
*
* @param arg1 Argument to construct data
* @param arg2 Argument to construct indices
* @param arg3 Argument to construct ofsets
* @param arg4 Argument to construct wires
*/
static auto create(std::initializer_list<ComplexT> arg1,
std::initializer_list<IdxT> arg2,
std::initializer_list<IdxT> arg3,
std::initializer_list<std::size_t> arg4)
-> std::shared_ptr<SparseHamiltonianBase<StateVectorT>> {
return std::shared_ptr<SparseHamiltonianBase<StateVectorT>>(
new SparseHamiltonianBase<StateVectorT>{
std::move(arg1), std::move(arg2), std::move(arg3),
std::move(arg4)});
}

vincentmr marked this conversation as resolved.
Show resolved Hide resolved
[[nodiscard]] auto getObsName() const -> std::string override {
using Pennylane::Util::operator<<;
std::ostringstream ss;
ss << "SparseHamiltonian: {\n'data' : \n";
for (const auto &d : data_)
//Note that for LGPU backend, ComplexT is std::complex as of 0.33 release
//Need to revisit it once we set ComplexT as cuComplex later
vincentmr marked this conversation as resolved.
Show resolved Hide resolved
ss <<"{" << d.real() <<", "<< d.real() <<"},"<< "\n";
ss << ",\n'indices' : \n";
for (const auto &i : indices_)
ss << i <<;
ss << ",\n'offsets' : \n";
for (const auto &o : offsets_)
ss << o;
ss << "\n}";
return ss.str();
}
/**
* @brief Get the wires the observable applies to.
*/
[[nodiscard]] auto getWires() const -> std::vector<size_t> {
return wires_;
};
};

} // namespace Pennylane::Observables
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ template class Observables::TensorProdObs<StateVectorCudaManaged<double>>;

template class Observables::Hamiltonian<StateVectorCudaManaged<float>>;
template class Observables::Hamiltonian<StateVectorCudaManaged<double>>;

template class Observables::SparseHamiltonian<StateVectorCudaManaged<float>>;
template class Observables::SparseHamiltonian<StateVectorCudaManaged<double>>;
Original file line number Diff line number Diff line change
Expand Up @@ -209,4 +209,90 @@ class Hamiltonian final : public HamiltonianBase<StateVectorT> {
}
};

/**
* @brief Sparse representation of Hamiltonian<StateVectorT>
*
*/
template <class StateVectorT>
class SparseHamiltonian final : public SparseHamiltonianBase<StateVectorT> {
public:
using PrecisionT = typename StateVectorT::PrecisionT;
using ComplexT = typename StateVectorT::ComplexT;
// cuSparse required index type
using IdxT =
typename std::conditional<std::is_same<PrecisionT, float>::value,
int32_t, int64_t>::type;

private:
using BaseType = SparseHamiltonianBase<StateVectorT>;

public:
/**
* @brief Create a SparseHamiltonian from data, indices and offsets in CSR
* format.
*
* @param data Arguments to construct data
* @param indices Arguments to construct indices
* @param offsets Arguments to construct offsets
* @param wires Arguments to construct wires
*/
template <typename T1, typename T2, typename T3 = T2, typename T4>
explicit SparseHamiltonian(T1 &&data, T2 &&indices, T3 &&offsets,
T4 &&wires)
: BaseType{data, indices, offsets, wires} {}

/**
* @brief Convenient wrapper for the constructor as the constructor does not
* convert the std::shared_ptr with a derived class correctly.
*
* This function is useful as std::make_shared does not handle
* brace-enclosed initializer list correctly.
*
* @param data Argument to construct data
* @param indices Argument to construct indices
* @param offsets Argument to construct ofsets
* @param wires Argument to construct wires
*/
static auto create(std::initializer_list<ComplexT> data,
std::initializer_list<IdxT> indices,
std::initializer_list<IdxT> offsets,
std::initializer_list<std::size_t> wires)
-> std::shared_ptr<SparseHamiltonian<StateVectorT>> {
return std::shared_ptr<SparseHamiltonian<StateVectorT>>(
new SparseHamiltonian<StateVectorT>{
std::move(data), std::move(indices), std::move(offsets),
std::move(wires)});
}

/**
* @brief Updates the statevector SV:->SV', where SV' = a*H*SV, and where H
* is a sparse Hamiltonian.
*
*/
void applyInPlace(StateVectorT &sv) const override {
PL_ABORT_IF_NOT(this->wires_.size() == sv.getNumQubits(),
"SparseH wire count does not match state-vector size");
using CFP_t = typename StateVectorT::CFP_t;

const std::size_t nIndexBits = sv.getNumQubits();
const std::size_t length = std::size_t{1} << nIndexBits;

auto device_id = sv.getDataBuffer().getDevTag().getDeviceID();
auto stream_id = sv.getDataBuffer().getDevTag().getStreamID();

cusparseHandle_t handle = sv.getCusparseHandle();

std::unique_ptr<DataBuffer<CFP_t>> d_sv_prime =
std::make_unique<DataBuffer<CFP_t>>(length, device_id, stream_id,
true);

SparseMV_cuSparse<IdxT, PrecisionT, CFP_t>(
this->offsets_.data(), this->offsets_.size(), this->indices_.data(),
this->data_.data(), this->data_.size(), sv.getData(),
d_sv_prime->getData(), device_id, stream_id, handle);

sv.updateData(std::move(d_sv_prime));
}
};

} // namespace Pennylane::LightningGPU::Observables
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ template class Observables::TensorProdObsMPI<StateVectorCudaMPI<double>>;

template class Observables::HamiltonianMPI<StateVectorCudaMPI<float>>;
template class Observables::HamiltonianMPI<StateVectorCudaMPI<double>>;

template class Observables::SparseHamiltonianMPI<StateVectorCudaMPI<float>>;
template class Observables::SparseHamiltonianMPI<StateVectorCudaMPI<double>>;
Original file line number Diff line number Diff line change
Expand Up @@ -214,4 +214,160 @@ class HamiltonianMPI final : public HamiltonianBase<StateVectorT> {
}
};

/**
* @brief Sparse representation of Hamiltonian<StateVectorT>
*
*/
template <class StateVectorT>
class SparseHamiltonianMPI final : public SparseHamiltonianBase<StateVectorT> {
public:
using PrecisionT = typename StateVectorT::PrecisionT;
using ComplexT = typename StateVectorT::ComplexT;
// cuSparse required index type
using IdxT =
typename std::conditional<std::is_same<PrecisionT, float>::value,
int32_t, int64_t>::type;

private:
using BaseType = SparseHamiltonianBase<StateVectorT>;

public:
/**
* @brief Create a SparseHamiltonianMPI from data, indices and offsets in
* CSR format.
*
* @param data Arguments to construct data
* @param indices Arguments to construct indices
* @param offsets Arguments to construct offsets
* @param wires Arguments to construct wires
*/
template <typename T1, typename T2, typename T3 = T2, typename T4>
explicit SparseHamiltonianMPI(T1 &&data, T2 &&indices, T3 &&offsets,
T4 &&wires)
: BaseType{data, indices, offsets, wires} {}

/**
* @brief Convenient wrapper for the constructor as the constructor does not
* convert the std::shared_ptr with a derived class correctly.
*
* This function is useful as std::make_shared does not handle
* brace-enclosed initializer list correctly.
*
* @param data Argument to construct data
* @param indices Argument to construct indices
* @param offsets Argument to construct ofsets
* @param wires Argument to construct wires
*/
static auto create(std::initializer_list<ComplexT> data,
std::initializer_list<IdxT> indices,
std::initializer_list<IdxT> offsets,
std::initializer_list<std::size_t> wires)
-> std::shared_ptr<SparseHamiltonianMPI<StateVectorT>> {
return std::shared_ptr<SparseHamiltonianMPI<StateVectorT>>(
new SparseHamiltonianMPI<StateVectorT>{
std::move(data), std::move(indices), std::move(offsets),
std::move(wires)});
}

/**
* @brief Updates the statevector SV:->SV', where SV' = a*H*SV, and where H
* is a sparse Hamiltonian.
*
*/
void applyInPlace(StateVectorT &sv) const override {
auto mpi_manager = sv.getMPIManager();
if (mpi_manager.getRank() == 0) {
PL_ABORT_IF_NOT(
this->wires_.size() == sv.getTotalNumQubits(),
"SparseH wire count does not match state-vector size");
}
using CFP_t = typename StateVectorT::CFP_t;

// Distribute sparse matrix across multi-nodes/multi-gpus
size_t num_rows = size_t{1} << sv.getTotalNumQubits();
size_t local_num_rows = size_t{1} << sv.getNumLocalQubits();

std::vector<std::vector<CSRMatrix<PrecisionT, IdxT>>> csrmatrix_blocks;

if (mpi_manager.getRank() == 0) {
csrmatrix_blocks = splitCSRMatrix<PrecisionT, IdxT>(
mpi_manager, num_rows, this->offsets_.data(),
this->indices_.data(), this->data_.data());
}
mpi_manager.Barrier();

std::vector<CSRMatrix<PrecisionT, IdxT>> localCSRMatVector;
for (size_t i = 0; i < mpi_manager.getSize(); i++) {
auto localCSRMat = scatterCSRMatrix<PrecisionT, IdxT>(
mpi_manager, csrmatrix_blocks[i], local_num_rows, 0);
localCSRMatVector.push_back(localCSRMat);
}

mpi_manager.Barrier();

auto device_id = sv.getDataBuffer().getDevTag().getDeviceID();
auto stream_id = sv.getDataBuffer().getDevTag().getStreamID();
cusparseHandle_t handle = sv.getCusparseHandle();

const size_t length_local = size_t{1} << sv.getNumLocalQubits();

std::unique_ptr<DataBuffer<CFP_t>> d_sv_prime =
std::make_unique<DataBuffer<CFP_t>>(length_local, device_id,
stream_id, true);
std::unique_ptr<DataBuffer<CFP_t>> d_tmp =
std::make_unique<DataBuffer<CFP_t>>(length_local, device_id,
stream_id, true);
d_sv_prime->zeroInit();
PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize());
mpi_manager.Barrier();

for (size_t i = 0; i < mpi_manager.getSize(); i++) {
size_t color = 0;
auto &localCSRMatrix = localCSRMatVector[i];

if (localCSRMatrix.getValues().size() != 0) {
color = 1;
SparseMV_cuSparse<IdxT, PrecisionT, CFP_t>(
localCSRMatrix.getCsrOffsets().data(),
localCSRMatrix.getCsrOffsets().size(),
localCSRMatrix.getColumns().data(),
localCSRMatrix.getValues().data(),
localCSRMatrix.getValues().size(), sv.getData(),
d_sv_prime->getData(), device_id, stream_id, handle);
}
PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize());
mpi_manager.Barrier();

if (mpi_manager.getRank() == i) {
color = 1;
if (localCSRMatrix.getValues().size() == 0) {
d_tmp->zeroInit();
}
}
PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize());
mpi_manager.Barrier();

auto new_mpi_manager =
mpi_manager.split(color, mpi_manager.getRank());
int reduce_root_rank = -1;

if (mpi_manager.getRank() == i) {
reduce_root_rank = new_mpi_manager.getRank();
}

mpi_manager.template Bcast<int>(reduce_root_rank, i);

if (new_mpi_manager.getComm() != MPI_COMM_NULL) {
new_mpi_manager.template Reduce<CFP_t>(
d_tmp->getData(), d_sv_prime->getData(), length_local,
reduce_root_rank, "sum");
}
PL_CUDA_IS_SUCCESS(cudaDeviceSynchronize());
mpi_manager.Barrier();
}
sv.CopyGpuDataToGpuIn(d_sv_prime->getData(), d_sv_prime->getLength());
mpi_manager.Barrier();
}
};

} // namespace Pennylane::LightningGPU::Observables
Loading