Merge branch 'gpuNewAPI_AdjJaco' into gpuNewAPI_fullSupport

PennyLaneAI · Sep 17, 2024 · bfcda74 · bfcda74
2 parents ebe960d + faead9c
commit bfcda74
Show file tree

Hide file tree

Showing 18 changed files with 176 additions and 166 deletions.
diff --git a/pennylane_lightning/core/_measurements_base.py b/pennylane_lightning/core/_measurements_base.py
@@ -130,16 +130,37 @@ def expval(self, measurementprocess: MeasurementProcess):
             measurementprocess.obs.name, measurementprocess.obs.wires
         )
 
-    @abstractmethod
+    def _probs_retval_conversion(self, probs_results: Any) -> np.ndarray:
+        """Convert the data structure from the C++ backend to a common structure through lightning devices.
+        Args:
+            probs_result (Any): Result provided by C++ backend.
+        Returns:
+            np.ndarray with probabilities of the supplied observable or wires.
+        """
+        return probs_results
+
     def probs(self, measurementprocess: MeasurementProcess):
         """Probabilities of the supplied observable or wires contained in the MeasurementProcess.
 
         Args:
-            measurementprocess (StateMeasurement): measurement to apply to the state
+            measurementprocess (StateMeasurement): measurement to apply to the state.
 
         Returns:
-            Probabilities of the supplied observable or wires
+            Probabilities of the supplied observable or wires.
         """
+        diagonalizing_gates = measurementprocess.diagonalizing_gates()
+
+        if diagonalizing_gates:
+            self._qubit_state.apply_operations(diagonalizing_gates)
+
+        results = self._measurement_lightning.probs(measurementprocess.wires.tolist())
+
+        if diagonalizing_gates:
+            self._qubit_state.apply_operations(
+                [qml.adjoint(g, lazy=False) for g in reversed(diagonalizing_gates)]
+            )
+
+        return self._probs_retval_conversion(results)
 
     def var(self, measurementprocess: MeasurementProcess):
         """Variance of the supplied observable contained in the MeasurementProcess.

diff --git a/pennylane_lightning/core/_state_vector_base.py b/pennylane_lightning/core/_state_vector_base.py
@@ -16,7 +16,7 @@
 """
 
 from abc import ABC, abstractmethod
-from typing import Union
+from typing import Optional, Union
 
 import numpy as np
 from pennylane import BasisState, StatePrep
@@ -35,16 +35,20 @@ class LightningBaseStateVector(ABC):
         num_wires(int): the number of wires to initialize the device with
         dtype: Datatypes for state-vector representation. Must be one of
             ``np.complex64`` or ``np.complex128``. Default is ``np.complex128``
+        sync Optional(bool): immediately sync with host-sv after applying operation.
     """
 
-    def __init__(self, num_wires: int, dtype: Union[np.complex128, np.complex64]):
+    def __init__(
+        self, num_wires: int, dtype: Union[np.complex128, np.complex64], sync: Optional[bool] = None
+    ):
 
         if dtype not in [np.complex64, np.complex128]:
             raise TypeError(f"Unsupported complex type: {dtype}")
 
         self._num_wires = num_wires
         self._wires = Wires(range(num_wires))
         self._dtype = dtype
+        self._base_sync = sync
 
         # Dummy for the device name
         self._device_name = None
@@ -96,28 +100,32 @@ def _state_dtype(self):
         Returns: the state vector class
         """
 
-    def reset_state(self):
+    def reset_state(self, sync: Optional[bool] = None):
         """Reset the device's state"""
         # init the state vector to |00..0>
-        self._qubit_state.resetStateVector()
+        if sync == None:
+            self._qubit_state.resetStateVector()
+        else:
+            self._qubit_state.resetStateVector(sync)
 
     @abstractmethod
-    def _apply_state_vector(self, state, device_wires: Wires):
+    def _apply_state_vector(self, state, device_wires: Wires, sync: Optional[bool] = None):
         """Initialize the internal state vector in a specified state.
         Args:
             state (array[complex]): normalized input state of length ``2**len(wires)``
                 or broadcasted state of shape ``(batch_size, 2**len(wires))``
             device_wires (Wires): wires that get initialized in the state
         """
 
-    def _apply_basis_state(self, state, wires):
+    def _apply_basis_state(self, state, wires, use_async: Optional[bool] = None):
         """Initialize the state vector in a specified computational basis state.
 
         Args:
             state (array[int]): computational basis state of shape ``(wires,)``
                 consisting of 0s and 1s.
             wires (Wires): wires that the provided computational state should be
                 initialized on
+            use_async(Optional[bool]): immediately sync with host-sv after applying operation.
 
         Note: This function does not support broadcasted inputs yet.
         """
@@ -128,7 +136,10 @@ def _apply_basis_state(self, state, wires):
             raise ValueError("BasisState parameter and wires must be of equal length.")
 
         # Return a computational basis state over all wires.
-        self._qubit_state.setBasisState(list(state), list(wires))
+        if use_async == None:
+            self._qubit_state.setBasisState(list(state), list(wires))
+        else:
+            self._qubit_state.setBasisState(list(state), list(wires), use_async)
 
     @abstractmethod
     def _apply_lightning_controlled(self, operation):
@@ -185,7 +196,9 @@ def apply_operations(
                 self._apply_state_vector(operations[0].parameters[0].copy(), operations[0].wires)
                 operations = operations[1:]
             elif isinstance(operations[0], BasisState):
-                self._apply_basis_state(operations[0].parameters[0], operations[0].wires)
+                self._apply_basis_state(
+                    operations[0].parameters[0], operations[0].wires, self._base_sync
+                )
                 operations = operations[1:]
         self._apply_lightning(
             operations, mid_measurements=mid_measurements, postselect_mode=postselect_mode

diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py
@@ -16,4 +16,4 @@
    Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.39.0-dev18"
+__version__ = "0.39.0-dev25"
diff --git a/pennylane_lightning/core/lightning_newAPI_base.py b/pennylane_lightning/core/lightning_newAPI_base.py
@@ -68,6 +68,7 @@ def __init__(  # pylint: disable=too-many-arguments
 
         self._c_dtype = c_dtype
         self._batch_obs = batch_obs
+        self._sync = None
 
         if isinstance(wires, int):
             self._wire_map = None  # should just use wires as is
@@ -133,7 +134,7 @@ def jacobian(
         """
         if wire_map is not None:
             [circuit], _ = qml.map_wires(circuit, wire_map)
-        state.reset_state()
+        state.reset_state(self._sync)
         final_state = state.get_final_state(circuit)
         return self.LightningAdjointJacobian(final_state, batch_obs=batch_obs).calculate_jacobian(
             circuit
@@ -191,7 +192,7 @@ def vjp(  # pylint: disable=too-many-arguments
         """
         if wire_map is not None:
             [circuit], _ = qml.map_wires(circuit, wire_map)
-        state.reset_state()
+        state.reset_state(self._sync)
         final_state = state.get_final_state(circuit)
         return self.LightningAdjointJacobian(final_state, batch_obs=batch_obs).calculate_vjp(
             circuit, cotangents

diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp
@@ -184,6 +184,42 @@ class StateVectorCudaManaged
                            stream_id);
     }
 
+    /**
+     * @brief Prepare a single computational basis state.
+     *
+     * @param state Binary number representing the index
+     * @param wires Wires.
+     * @param use_async(Optional[bool]): immediately sync with host-sv after
+     applying operation.
+
+     */
+    void setBasisState(const std::vector<std::size_t> &state,
+                       const std::vector<std::size_t> &wires,
+                       const bool use_async) {
+        PL_ABORT_IF_NOT(state.size() == wires.size(),
+                        "state and wires must have equal dimensions.");
+        const auto num_qubits = BaseType::getNumQubits();
+        PL_ABORT_IF_NOT(
+            std::find_if(wires.begin(), wires.end(),
+                         [&num_qubits](const auto i) {
+                             return i >= num_qubits;
+                         }) == wires.end(),
+            "wires must take values lower than the number of qubits.");
+        const auto n_wires = wires.size();
+        std::size_t index{0U};
+        for (std::size_t k = 0; k < n_wires; k++) {
+            const auto bit = static_cast<std::size_t>(state[k]);
+            index |= bit << (num_qubits - 1 - wires[k]);
+        }
+
+        BaseType::getDataBuffer().zeroInit();
+        const std::complex<PrecisionT> value(1, 0);
+        CFP_t value_cu = cuUtil::complexToCu<std::complex<Precision>>(value);
+        auto stream_id = BaseType::getDataBuffer().getDevTag().getStreamID();
+        setBasisState_CUDA(BaseType::getData(), value_cu, index, use_async,
+                           stream_id);
+    }
+
     /**
      * @brief Set values for a batch of elements of the state-vector. This
      * method is implemented by the customized CUDA kernel defined in the

diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindings.hpp
@@ -82,13 +82,21 @@ void registerBackendClassSpecificBindings(PyClass &pyclass) {
                                     static_cast<std::size_t>(arr.size()));
         }))
         .def(
-            "setBasisState",
-            [](StateVectorT &sv, const std::size_t index,
-               const bool use_async) {
+            "setBasisStateZero",
+            [](StateVectorT &sv, const bool use_async) {
                 const std::complex<PrecisionT> value(1, 0);
-                sv.setBasisState(value, index, use_async);
+                std::size_t zero{0U};
+                sv.setBasisState(value, zero, use_async);
             },
-            "Create Basis State on GPU.")
+            "Create Basis State to zero on GPU.")
+        .def(
+            "setBasisState",
+            [](StateVectorT &sv, const std::vector<std::size_t> &state,
+               const std::vector<std::size_t> &wires, const bool use_async) {
+                sv.setBasisState(state, wires, use_async);
+            },
+            "Set the state vector to a basis state on GPU.")
+
         .def(
             "setStateVector",
             [](StateVectorT &sv, const np_arr_sparse_ind &indices,
@@ -152,7 +160,7 @@ void registerBackendClassSpecificBindings(PyClass &pyclass) {
              "Get the GPU index for the statevector data.")
         .def("numQubits", &StateVectorT::getNumQubits)
         .def("dataLength", &StateVectorT::getLength)
-        .def("resetGPU", &StateVectorT::initSV)
+        .def("resetStateVector", &StateVectorT::initSV)
         .def(
             "apply",
             [](StateVectorT &sv, const std::string &str,

diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/bindings/LGPUBindingsMPI.hpp
@@ -155,7 +155,7 @@ void registerBackendClassSpecificBindingsMPI(PyClass &pyclass) {
              "Get the GPU index for the statevector data.")
         .def("numQubits", &StateVectorT::getNumQubits)
         .def("dataLength", &StateVectorT::getLength)
-        .def("resetGPU", &StateVectorT::initSV)
+        .def("resetStateVector", &StateVectorT::initSV)
         .def(
             "apply",
             [](StateVectorT &sv, const std::string &str,

diff --git a/pennylane_lightning/lightning_gpu/_adjoint_jacobian.py b/pennylane_lightning/lightning_gpu/_adjoint_jacobian.py
@@ -38,8 +38,9 @@
         warn(str(ex), UserWarning)
         MPI_SUPPORT = False
 
-except (ImportError, ValueError) as ex:
+except ImportError as ex:
     warn(str(ex), UserWarning)
+    pass
 
 import numpy as np
 from pennylane.tape import QuantumTape

diff --git a/pennylane_lightning/lightning_gpu/_measurements.py b/pennylane_lightning/lightning_gpu/_measurements.py
@@ -34,7 +34,7 @@
 
     pass
 
-from typing import List
+from typing import Any, List
 
 import numpy as np
 import pennylane as qml
@@ -109,9 +109,9 @@ def _process_single_shot(samples):
                 len(wires), shots.total_shots
             ).astype(int, copy=False)
 
-        except ValueError as e:
-            if str(e) != "probabilities contain NaN":
-                raise e
+        except ValueError as ex:
+            if str(ex) != "probabilities contain NaN":
+                raise ex
             samples = qml.math.full((shots.total_shots, len(wires)), 0)
 
         self._apply_diagonalizing_gates(mps, adjoint=True)
@@ -127,30 +127,19 @@ def _process_single_shot(samples):
             tuple(zip(*processed_samples)) if shots.has_partitioned_shots else processed_samples[0]
         )
 
-    def probs(self, measurementprocess: MeasurementProcess):
-        """Probabilities of the supplied observable or wires contained in the MeasurementProcess.
+    def _probs_retval_conversion(self, probs_results: Any) -> np.ndarray:
+        """Convert the data structure from the C++ backend to a common structure through lightning devices.
 
         Args:
-            measurementprocess (StateMeasurement): measurement to apply to the state
+            probs_result (Any): Result provided by C++ backend.
 
         Returns:
-            Probabilities of the supplied observable or wires
+            np.ndarray with probabilities of the supplied observable or wires.
         """
-        diagonalizing_gates = measurementprocess.diagonalizing_gates()
-
-        if diagonalizing_gates:
-            self._qubit_state.apply_operations(diagonalizing_gates)
-
-        results = self._measurement_lightning.probs(measurementprocess.wires.tolist())
-
-        if diagonalizing_gates:
-            self._qubit_state.apply_operations(
-                [qml.adjoint(g, lazy=False) for g in reversed(diagonalizing_gates)]
-            )
 
         # Device returns as col-major orderings, so perform transpose on data for bit-index shuffle for now.
-        if len(results) > 0:
-            num_local_wires = len(results).bit_length() - 1 if len(results) > 0 else 0
-            return results.reshape([2] * num_local_wires).transpose().reshape(-1)
+        if len(probs_results) > 0:
+            num_local_wires = len(probs_results).bit_length() - 1 if len(probs_results) > 0 else 0
+            return probs_results.reshape([2] * num_local_wires).transpose().reshape(-1)
 
-        return results
+        return probs_results
diff --git a/pennylane_lightning/lightning_gpu/_mpi_handler.py b/pennylane_lightning/lightning_gpu/_mpi_handler.py
@@ -15,12 +15,15 @@
 This module contains the :class:`~.LightningGPU_MPIHandler` class, a MPI handler to use LightningGPU device with multi-GPU on multi-node system.
 """
 
+from warnings import warn
+
 try:
     # pylint: disable=no-name-in-module
     from pennylane_lightning.lightning_gpu_ops import DevTag, MPIManager
 
     MPI_SUPPORT = True
-except ImportError:
+except ImportError as ex:
+    print(str(ex), UserWarning)
     MPI_SUPPORT = False
 
 from typing import Callable, Union
@@ -29,8 +32,8 @@
 
 
 # MPI options
-class LightningGPU_MPIHandler:
-    """MPI handler for PennyLane Lightning GPU device
+class MPIHandler:
+    """MPI handler for PennyLane Lightning GPU device.
 
     MPI handler to use a GPU-backed Lightning device using NVIDIA cuQuantum SDK with parallel capabilities.
 
@@ -41,7 +44,7 @@ class LightningGPU_MPIHandler:
         mpi_buf_size (int): size of GPU memory (in MiB) set for MPI operation and its default value is 64 MiB.
         dev_pool (Callable): Method to handle the GPU devices available.
         num_wires (int): the number of wires to initialize the device with.
-        c_dtype (np.complex64, np.complex128): Datatypes for statevector representation
+        c_dtype (np.complex64, np.complex128): Datatypes for statevector representation.
     """
 
     def __init__(
@@ -76,20 +79,23 @@ def __init__(
             # set the number of global and local wires
             commSize = self._mpi_manager.getSize()
             self.num_global_wires = commSize.bit_length() - 1
-            self.num_local_wires = num_wires - self._num_global_wires
+            self.num_local_wires = num_wires - self.num_global_wires
 
-            # Memory size in bytes
-            sv_memsize = np.dtype(c_dtype).itemsize * (1 << self.num_local_wires)
-            if self._mebibytesToBytes(mpi_buf_size) > sv_memsize:
-                raise ValueError("The MPI buffer size is larger than the local state vector size.")
+            self._check_memory_size(c_dtype, mpi_buf_size)
 
         if not self.use_mpi:
             self.num_local_wires = num_wires
             self.num_global_wires = num_wires
 
-    def _mebibytesToBytes(mebibytes):
+    def _mebibytesToBytes(self, mebibytes):
         return mebibytes * 1024 * 1024
 
+    def _check_memory_size(self, c_dtype, mpi_buf_size):
+        # Memory size in bytes
+        sv_memsize = np.dtype(c_dtype).itemsize * (1 << self.num_local_wires)
+        if self._mebibytesToBytes(mpi_buf_size) > sv_memsize:
+            raise ValueError("The MPI buffer size is larger than the local state vector size.")
+
     def _mpi_init_helper(self, num_wires):
         """Set up MPI checks and initializations."""
 
@@ -112,7 +118,7 @@ def _mpi_init_helper(self, num_wires):
             )
 
         # set GPU device
-        rank = self._mpi_manager.getRank()
+        rank = mpi_manager.getRank()
         deviceid = rank % numProcsNode
         self._dp.setDeviceID(deviceid)
         devtag = DevTag(deviceid)