Skip to content

Commit

Permalink
ITS-GPU: Move Tracklet finder on GPU (#13737)
Browse files Browse the repository at this point in the history
* Fix hybrid vertexer printouts

* Move multiplicity mask to a vector<uint8_t>

* Add gpuSpan

* Debugging getSpan

* Checkpointing

* Fix access in tracklet finding

* Fix tracklet LUTs issue

* Debugging small discrepancies

* Fix bad PhiBins pick

* Add tracklet counting

* Fix indices for used clusters

* Add tracklet writing on the buffer

* tracklets on gpu

* Tracklet finder on GPU
  • Loading branch information
mconcas authored Dec 3, 2024
1 parent 4bffbfa commit 33b4212
Show file tree
Hide file tree
Showing 13 changed files with 848 additions and 607 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ struct FastMultEst {

static uint32_t getCurrentRandomSeed();
int selectROFs(const gsl::span<const o2::itsmft::ROFRecord> rofs, const gsl::span<const o2::itsmft::CompClusterExt> clus,
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<bool>& sel);
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<uint8_t>& sel);

void fillNClPerLayer(const gsl::span<const o2::itsmft::CompClusterExt>& clusters);
float process(const std::array<int, NLayers> ncl)
Expand Down
2 changes: 1 addition & 1 deletion Detectors/ITSMFT/ITS/reconstruction/src/FastMultEst.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ float FastMultEst::processNoiseImposed(const std::array<int, NLayers> ncl)
}

int FastMultEst::selectROFs(const gsl::span<const o2::itsmft::ROFRecord> rofs, const gsl::span<const o2::itsmft::CompClusterExt> clus,
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<bool>& sel)
const gsl::span<const o2::itsmft::PhysTrigger> trig, std::vector<uint8_t>& sel)
{
int nrof = rofs.size(), nsel = 0;
const auto& multEstConf = FastMultEstConfig::Instance(); // parameters for mult estimation and cuts
Expand Down
63 changes: 46 additions & 17 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,19 @@ class TimeFrameGPU : public TimeFrame
void initialise(const int, const TrackingParameters&, const int, IndexTableUtils* utils = nullptr, const TimeFrameGPUParameters* pars = nullptr);
void initDevice(IndexTableUtils*, const TrackingParameters& trkParam, const TimeFrameGPUParameters&, const int, const int);
void initDeviceSAFitting();
void loadIndexTableUtils(const int);
void loadTrackingFrameInfoDevice(const int);
void loadUnsortedClustersDevice(const int);
void loadClustersDevice(const int);
void loadClustersIndexTables(const int iteration);
void createUsedClustersDevice(const int);
void loadUsedClustersDevice();
void loadROframeClustersDevice(const int);
void loadMultiplicityCutMask(const int);
void loadVertices(const int);

///
void createTrackletsLUTDevice(const int);
void loadTrackletsDevice();
void loadTrackletsLUTDevice();
void loadCellsDevice();
Expand All @@ -62,6 +72,7 @@ class TimeFrameGPU : public TimeFrame
void loadTrackSeedsChi2Device();
void loadRoadsDevice();
void loadTrackSeedsDevice(std::vector<CellSeed>&);
void createTrackletsBuffers();
void createCellsBuffers(const int);
void createCellsDevice();
void createCellsLUTDevice();
Expand Down Expand Up @@ -93,7 +104,7 @@ class TimeFrameGPU : public TimeFrame
std::vector<std::vector<o2::MCCompLabel>>& getLabelsInChunks() { return mLabelsInChunks; }
int getNAllocatedROFs() const { return mNrof; } // Allocated means maximum nROF for each chunk while populated is the number of loaded ones.
StaticTrackingParameters<nLayers>* getDeviceTrackingParameters() { return mTrackingParamsDevice; }
Vertex* getDeviceVertices() { return mVerticesDevice; }
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
unsigned char* getDeviceUsedClusters(const int);
const o2::base::Propagator* getChainPropagator();
Expand All @@ -107,26 +118,32 @@ class TimeFrameGPU : public TimeFrame
const TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() const { return mTrackingFrameInfoDeviceArray; }
const Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; }
const Cluster** getDeviceArrayUnsortedClusters() const { return mUnsortedClustersDeviceArray; }
const Tracklet** getDeviceArrayTracklets() const { return mTrackletsDeviceArray; }
const int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
const int** getDeviceArrayClustersIndexTables() const { return mClustersIndexTablesDeviceArray; }
std::vector<unsigned int> getClusterSizes();
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
const int** getDeviceROframeClusters() const { return mROFrameClustersDeviceArray; }
Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
uint8_t* getDeviceMultCutMask() { return mMultMaskDevice; }

void setDevicePropagator(const o2::base::PropagatorImpl<float>*) override;

// Host-specific getters
gsl::span<int> getHostNTracklets(const int chunkId);
gsl::span<int> getHostNCells(const int chunkId);
gsl::span<int, nLayers - 1> getNTracklets() { return mNTracklets; }
gsl::span<int, nLayers - 2> getNCells() { return mNCells; }

// Host-available device getters
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
gsl::span<Tracklet*> getDeviceTracklet() { return mTrackletsDevice; }
gsl::span<CellSeed*> getDeviceCells() { return mCellsDevice; }
gsl::span<int, nLayers - 2> getNCellsDevice() { return mNCells; }

private:
void allocMemAsync(void**, size_t, Stream*, bool); // Abstract owned and unowned memory allocations
Expand All @@ -136,31 +153,37 @@ class TimeFrameGPU : public TimeFrame
StaticTrackingParameters<nLayers> mStaticTrackingParams;

// Host-available device buffer sizes
std::array<int, nLayers - 1> mNTracklets;
std::array<int, nLayers - 2> mNCells;

// Device pointers
StaticTrackingParameters<nLayers>* mTrackingParamsDevice;
IndexTableUtils* mIndexTableUtilsDevice;
std::array<int*, nLayers> mROFramesClustersDevice;
std::array<unsigned char*, nLayers> mUsedClustersDevice;
Vertex* mVerticesDevice;
int* mROFramesPVDevice;

// Hybrid pref
uint8_t* mMultMaskDevice;
Vertex* mPrimaryVerticesDevice;
int* mROFramesPVDevice;
std::array<Cluster*, nLayers> mClustersDevice;
std::array<Cluster*, nLayers> mUnsortedClustersDevice;
std::array<int*, nLayers> mClustersIndexTablesDevice;
std::array<unsigned char*, nLayers> mUsedClustersDevice;
std::array<int*, nLayers> mROFramesClustersDevice;
const Cluster** mClustersDeviceArray;
const Cluster** mUnsortedClustersDeviceArray;
const int** mClustersIndexTablesDeviceArray;
const unsigned char** mUsedClustersDeviceArray;
const int** mROFrameClustersDeviceArray;
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
const Tracklet** mTrackletsDeviceArray;
const int** mTrackletsLUTDeviceArray;
std::array<int*, nLayers - 2> mTrackletsLUTDevice;
Tracklet** mTrackletsDeviceArray;
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
std::array<int*, nLayers - 2> mCellsLUTDevice;
std::array<int*, nLayers - 3> mNeighboursLUTDevice;

int** mCellsLUTDeviceArray;
int** mNeighboursCellDeviceArray;
int** mNeighboursCellLUTDeviceArray;
int** mTrackletsLUTDeviceArray;
std::array<CellSeed*, nLayers - 2> mCellsDevice;
std::array<int*, nLayers - 2> mNeighboursIndexTablesDevice;
CellSeed* mTrackSeedsDevice;
Expand All @@ -186,10 +209,6 @@ class TimeFrameGPU : public TimeFrame
std::vector<std::vector<int>> mNVerticesInChunks;
std::vector<std::vector<o2::MCCompLabel>> mLabelsInChunks;

// Host memory used only in GPU tracking
std::vector<int> mHostNTracklets;
std::vector<int> mHostNCells;

// Temporary buffer for storing output tracks from GPU tracking
std::vector<TrackITSExt> mTrackITSExt;
};
Expand All @@ -215,6 +234,16 @@ inline int TimeFrameGPU<nLayers>::getNClustersInRofSpan(const int rofIdstart, co
{
return static_cast<int>(mROFramesClusters[layerId][(rofIdstart + rofSpanSize) < mROFramesClusters.size() ? rofIdstart + rofSpanSize : mROFramesClusters.size() - 1] - mROFramesClusters[layerId][rofIdstart]);
}

template <int nLayers>
inline std::vector<unsigned int> TimeFrameGPU<nLayers>::getClusterSizes()
{
std::vector<unsigned int> sizes(mUnsortedClusters.size());
std::transform(mUnsortedClusters.begin(), mUnsortedClusters.end(), sizes.begin(),
[](const auto& v) { return static_cast<unsigned int>(v.size()); });
return sizes;
}

} // namespace gpu
} // namespace its
} // namespace o2
Expand Down
71 changes: 67 additions & 4 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,74 @@ GPUg() void fitTrackSeedsKernel(
#endif
} // namespace gpu

template <int nLayers = 7>
void countTrackletsInROFsHandler(const IndexTableUtils* utils,
const uint8_t* multMask,
const int startROF,
const int endROF,
const int maxROF,
const int deltaROF,
const int vertexId,
const Vertex* vertices,
const int* rofPV,
const int nVertices,
const Cluster** clusters,
std::vector<unsigned int> nClusters,
const int** ROFClusters,
const unsigned char** usedClusters,
const int** clustersIndexTables,
int** trackletsLUTs,
gsl::span<int*> trackletsLUTsHost,
const int iteration,
const float NSigmaCut,
std::vector<float>& phiCuts,
const float resolutionPV,
std::vector<float>& minR,
std::vector<float>& maxR,
std::vector<float>& resolutions,
std::vector<float>& radii,
std::vector<float>& mulScatAng,
const int nBlocks,
const int nThreads);

template <int nLayers = 7>
void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
const uint8_t* multMask,
const int startROF,
const int endROF,
const int maxROF,
const int deltaROF,
const int vertexId,
const Vertex* vertices,
const int* rofPV,
const int nVertices,
const Cluster** clusters,
std::vector<unsigned int> nClusters,
const int** ROFClusters,
const unsigned char** usedClusters,
const int** clustersIndexTables,
Tracklet** tracklets,
gsl::span<Tracklet*> spanTracklets,
gsl::span<int> nTracklets,
int** trackletsLUTs,
gsl::span<int*> trackletsLUTsHost,
const int iteration,
const float NSigmaCut,
std::vector<float>& phiCuts,
const float resolutionPV,
std::vector<float>& minR,
std::vector<float>& maxR,
std::vector<float>& resolutions,
std::vector<float>& radii,
std::vector<float>& mulScatAng,
const int nBlocks,
const int nThreads);

void countCellsHandler(const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
const Tracklet** tracklets,
const int** trackletsLUT,
Tracklet** tracklets,
int** trackletsLUT,
const int nTracklets,
const int layer,
CellSeed* cells,
Expand All @@ -70,8 +133,8 @@ void countCellsHandler(const Cluster** sortedClusters,
void computeCellsHandler(const Cluster** sortedClusters,
const Cluster** unsortedClusters,
const TrackingFrameInfo** tfInfo,
const Tracklet** tracklets,
const int** trackletsLUT,
Tracklet** tracklets,
int** trackletsLUT,
const int nTracklets,
const int layer,
CellSeed* cells,
Expand Down
43 changes: 43 additions & 0 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,49 @@ struct gpuPair {

namespace gpu
{
// Poor man implementation of a span-like struct. It is very limited.
template <typename T>
struct gpuSpan {
using value_type = T;
using ptr = T*;
using ref = T&;

GPUd() gpuSpan() : _data(nullptr), _size(0) {}
GPUd() gpuSpan(ptr data, unsigned int dim) : _data(data), _size(dim) {}
GPUd() ref operator[](unsigned int idx) const { return _data[idx]; }
GPUd() unsigned int size() const { return _size; }
GPUd() bool empty() const { return _size == 0; }
GPUd() ref front() const { return _data[0]; }
GPUd() ref back() const { return _data[_size - 1]; }
GPUd() ptr begin() const { return _data; }
GPUd() ptr end() const { return _data + _size; }

protected:
ptr _data;
unsigned int _size;
};

template <typename T>
struct gpuSpan<const T> {
using value_type = T;
using ptr = const T*;
using ref = const T&;

GPUd() gpuSpan() : _data(nullptr), _size(0) {}
GPUd() gpuSpan(ptr data, unsigned int dim) : _data(data), _size(dim) {}
GPUd() gpuSpan(const gpuSpan<T>& other) : _data(other._data), _size(other._size) {}
GPUd() ref operator[](unsigned int idx) const { return _data[idx]; }
GPUd() unsigned int size() const { return _size; }
GPUd() bool empty() const { return _size == 0; }
GPUd() ref front() const { return _data[0]; }
GPUd() ref back() const { return _data[_size - 1]; }
GPUd() ptr begin() const { return _data; }
GPUd() ptr end() const { return _data + _size; }

protected:
ptr _data;
unsigned int _size;
};

enum class Task {
Tracker = 0,
Expand Down
Loading

0 comments on commit 33b4212

Please sign in to comment.