-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add final touches * Fix typos * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update include/jet/CudaTensor.hpp Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update include/jet/TaskBasedContractor.hpp Co-authored-by: Mikhail Andrenkov <[email protected]> * Update include/jet/CudaTensor.hpp Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Add PR corrections * Update examples/heterogeneous_contraction.cu Co-authored-by: Mikhail Andrenkov <[email protected]> * Fix two minor issues Co-authored-by: Trevor Vincent <[email protected]> Co-authored-by: Mikhail Andrenkov <[email protected]>
- Loading branch information
1 parent
d74502b
commit 3589078
Showing
4 changed files
with
366 additions
and
205 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,261 @@ | ||
/** | ||
* @file heterogeneous_contraction.cu | ||
* | ||
* @brief Contracts three tensor network files on two gpus | ||
* and one cpu simultaneously | ||
* | ||
*/ | ||
|
||
#include <iostream> | ||
|
||
#include "CudaTensor.hpp" | ||
#include "PathInfo.hpp" | ||
#include "TaskBasedContractor.hpp" | ||
#include "Tensor.hpp" | ||
#include "TensorNetwork.hpp" | ||
#include "TensorNetworkIO.hpp" | ||
|
||
#include <cuComplex.h> | ||
#include <taskflow/cudaflow.hpp> | ||
|
||
|
||
using namespace Jet; | ||
|
||
template <typename T, int device = 0> struct CudaflowContractionTask { | ||
|
||
std::vector<std::unique_ptr<CudaTensor<T, device>>> tensors; | ||
std::vector<typename CudaTensor<T, device>::CudaContractionPlan> plans; | ||
std::vector<tf::cudaTask> kernel_tasks; | ||
std::vector<T> result; | ||
}; | ||
|
||
template <typename T, int device = 0> | ||
void AddCudaContractionToTaskflow( | ||
const TensorNetwork<CudaTensor<T, device>> &tn, | ||
const PathInfo &path_info, tf::Taskflow &taskflow, | ||
CudaflowContractionTask<T, device> &gpu_task) | ||
{ | ||
auto &tensors = gpu_task.tensors; | ||
auto &plans = gpu_task.plans; | ||
auto &result = gpu_task.result; | ||
auto &kernel_tasks = gpu_task.kernel_tasks; | ||
|
||
const auto &path_node_info = path_info.GetSteps(); | ||
const auto &path = path_info.GetPath(); | ||
const auto &nodes = tn.GetNodes(); | ||
size_t num_leafs = nodes.size(); | ||
tensors.resize(path_node_info.size()); | ||
plans.resize(path.size()); | ||
|
||
for (size_t i = 0; i < path.size(); i++) { | ||
|
||
const PathStepInfo &pnia = path_node_info[path[i].first]; | ||
const PathStepInfo &pnib = path_node_info[path[i].second]; | ||
const PathStepInfo &pnic = path_node_info[num_leafs + i]; | ||
|
||
if (pnia.id >= num_leafs) { | ||
tensors[path[i].first] = | ||
std::make_unique<CudaTensor<cuComplex, device>>( | ||
CudaTensor<cuComplex, device>(pnia.tensor_indices, | ||
pnia.shape)); | ||
} | ||
else { | ||
tensors[path[i].first] = | ||
std::make_unique<CudaTensor<cuComplex, device>>( | ||
CudaTensor<cuComplex, device>( | ||
tn.GetNodes()[pnia.id].tensor)); | ||
} | ||
|
||
if (pnib.id >= num_leafs) { | ||
tensors[path[i].second] = | ||
std::make_unique<CudaTensor<cuComplex, device>>( | ||
CudaTensor<cuComplex, device>(pnib.tensor_indices, | ||
pnib.shape)); | ||
} | ||
else { | ||
tensors[path[i].second] = | ||
std::make_unique<CudaTensor<cuComplex, device>>( | ||
CudaTensor<cuComplex, device>( | ||
tn.GetNodes()[pnib.id].tensor)); | ||
} | ||
|
||
tensors[num_leafs + i] = | ||
std::make_unique<CudaTensor<cuComplex, device>>( | ||
CudaTensor<cuComplex, device>(pnic.tensor_indices, pnic.shape)); | ||
|
||
CudaTensor<cuComplex, device>::GetCudaContractionPlan( | ||
plans[i], *tensors[path[i].first], *tensors[path[i].second], | ||
*tensors[num_leafs + i]); | ||
} | ||
|
||
tf::Task task = taskflow.emplace_on( | ||
[&,path,path_node_info,num_leafs](tf::cudaFlowCapturer &capturer) { | ||
for (int i = 0; i < path.size(); i++) { | ||
|
||
const PathStepInfo &pnia = path_node_info[path[i].first]; | ||
const PathStepInfo &pnib = path_node_info[path[i].second]; | ||
const PathStepInfo &pnic = path_node_info[num_leafs + i]; | ||
|
||
auto tensor_a = tensors[path[i].first]->GetData(); | ||
auto tensor_b = tensors[path[i].second]->GetData(); | ||
auto tensor_c = tensors[num_leafs + i]->GetData(); | ||
|
||
auto &c_plan = plans[i]; | ||
tf::cudaTask kernel = | ||
capturer.on([&, c_plan, tensor_a, tensor_b, | ||
tensor_c](cudaStream_t stream) { | ||
cuComplex alpha; | ||
alpha.x = 1.; | ||
alpha.y = 0.; | ||
|
||
cuComplex beta; | ||
beta.x = 0.; | ||
beta.y = 0.; | ||
|
||
cutensorContraction(&c_plan.handle, &c_plan.plan, | ||
&alpha, tensor_a, tensor_b, &beta, | ||
tensor_c, tensor_c, c_plan.work, | ||
c_plan.work_size, stream); | ||
}); | ||
|
||
kernel_tasks.push_back(kernel); | ||
|
||
if (pnia.id >= num_leafs) { | ||
kernel_tasks[pnia.id - num_leafs].precede(kernel); | ||
} | ||
|
||
if (pnib.id >= num_leafs) { | ||
kernel_tasks[pnib.id - num_leafs].precede(kernel); | ||
} | ||
|
||
// copy data from gpu_data to host_data | ||
if (i == path.size() - 1) { | ||
result.resize(tensors[pnic.id]->GetSize()); | ||
tf::cudaTask d2h = capturer.memcpy( | ||
result.data(), tensors[pnic.id]->GetData(), | ||
tensors[pnic.id]->GetSize() * sizeof(cuComplex)); | ||
|
||
kernel.precede(d2h); | ||
} | ||
} | ||
}, | ||
device); | ||
} | ||
|
||
int main(int argc, char *argv[]) | ||
{ | ||
|
||
if (argc != 4) { | ||
std::cout << "heterogeneous_contraction.cu <tensor network file 1 on GPU 0> " | ||
"<tensor network file 2 on GPU 1> <tensor network file 3 on CPU>" | ||
<< std::endl; | ||
std::cout << "Contracts three circuits on two GPUs and one CPU" | ||
<< std::endl; | ||
} | ||
|
||
std::string file_name_0 = argv[1]; | ||
std::string file_name_1 = argv[2]; | ||
std::string file_name_2 = argv[3]; | ||
|
||
/* | ||
* Load first tensor network file onto GPU 0 | ||
*/ | ||
|
||
TensorNetworkFile<CudaTensor<cuComplex, 0>> tensor_file_0; | ||
try { | ||
std::ifstream tn_data(file_name_0); | ||
std::string circuit_str{std::istreambuf_iterator<char>(tn_data), | ||
std::istreambuf_iterator<char>()}; | ||
// Load data into TensorNetwork and PathInfo objects | ||
TensorNetworkSerializer<CudaTensor<cuComplex, 0>> serializer; | ||
tensor_file_0 = serializer(circuit_str, true); | ||
} | ||
catch (...) { | ||
std::cerr << "Please specify a valid first JSON file to contract" | ||
<< std::endl; | ||
exit(1); | ||
} | ||
|
||
TensorNetwork<CudaTensor<cuComplex, 0>> tn_0 = tensor_file_0.tensors; | ||
PathInfo path_0 = tensor_file_0.path.value(); | ||
|
||
/** | ||
* Load second tensor network file onto GPU 1 | ||
*/ | ||
|
||
TensorNetworkFile<CudaTensor<cuComplex, 1>> tensor_file_1; | ||
try { | ||
std::ifstream tn_data(file_name_1); | ||
std::string circuit_str{std::istreambuf_iterator<char>(tn_data), | ||
std::istreambuf_iterator<char>()}; | ||
// Load data into TensorNetwork and PathInfo objects | ||
TensorNetworkSerializer<CudaTensor<cuComplex, 1>> serializer; | ||
tensor_file_1 = serializer(circuit_str, true); | ||
} | ||
catch (...) { | ||
std::cerr << "Please specify a valid second JSON file to contract" | ||
<< std::endl; | ||
exit(1); | ||
} | ||
|
||
TensorNetwork<CudaTensor<cuComplex, 1>> tn_1 = tensor_file_1.tensors; | ||
PathInfo path_1 = tensor_file_1.path.value(); | ||
|
||
/** | ||
* Load third tensor network file onto CPU | ||
*/ | ||
|
||
TensorNetworkFile<Tensor<std::complex<float>>> tensor_file_2; | ||
try { | ||
std::ifstream tn_data(file_name_2); | ||
std::string circuit_str{std::istreambuf_iterator<char>(tn_data), | ||
std::istreambuf_iterator<char>()}; | ||
// Load data into TensorNetwork and PathInfo objects | ||
TensorNetworkSerializer<Tensor<std::complex<float>>> serializer; | ||
tensor_file_2 = serializer(circuit_str, true); | ||
} | ||
catch (...) { | ||
std::cerr << "Please specify a valid JSON file to contract" | ||
<< std::endl; | ||
exit(1); | ||
} | ||
TensorNetwork<Tensor<std::complex<float>>> tn_2 = | ||
tensor_file_2.tensors; | ||
PathInfo path_2 = tensor_file_2.path.value(); | ||
|
||
tf::Taskflow taskflow; | ||
|
||
/* set up gpu 0 contraction task */ | ||
CudaflowContractionTask<cuComplex, 0> gpu_task_0; | ||
AddCudaContractionToTaskflow<cuComplex, 0>(tn_0, path_0, taskflow, | ||
gpu_task_0); | ||
|
||
/* set up gpu 1 contraction task */ | ||
CudaflowContractionTask<cuComplex, 1> gpu_task_1; | ||
AddCudaContractionToTaskflow<cuComplex, 1>(tn_1, path_1, taskflow, | ||
gpu_task_1); | ||
|
||
/* set up cpu contraction task */ | ||
TaskBasedContractor<Tensor<std::complex<float>>> contractor; | ||
contractor.AddContractionTasks(tn_2, path_2); | ||
|
||
// Add gpu task graph to cpu task graph | ||
contractor.AddTaskflow(taskflow); | ||
|
||
/* Contract on all devices */ | ||
contractor.Contract().wait(); | ||
|
||
/* Display results */ | ||
auto result0 = gpu_task_0.result; | ||
std::cout << "GPU 0 result = " << result0[0].x << " " << result0[0].y | ||
<< std::endl; | ||
|
||
auto result1 = gpu_task_1.result; | ||
std::cout << "GPU 1 result = " << result1[0].x << " " << result1[0].y | ||
<< std::endl; | ||
|
||
auto result2 = contractor.GetResults()[0]; | ||
std::cout << "CPU result = " << result2 << std::endl; | ||
|
||
return 0; | ||
} |
Oops, something went wrong.