Skip to content

Commit

Permalink
[Fix](mlu-ops): fix mem-leak caused by tensor secondary memory manage…
Browse files Browse the repository at this point in the history
…ment (#1095)

修正全局变量执行 tensor二次内存管理导致的内存检测工具报内存泄露故障 
Co-authored-by: nizhijie <[email protected]>
  • Loading branch information
nth-BYTE authored Sep 30, 2024
1 parent c6d7b45 commit c0026e3
Showing 1 changed file with 46 additions and 27 deletions.
73 changes: 46 additions & 27 deletions core/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,27 +310,46 @@ struct mluOpTensorDescriptorQueueStruct {
extend_num *= 2;
}

// Let the OS do the cleanup since it's a global variable
~mluOpTensorDescriptorQueueStruct() {}
// cleanup headers
~mluOpTensorDescriptorQueueStruct() {
for (auto header : headers) {
free(header);
}
}

inline void lock() {
while (flag.test_and_set(std::memory_order_acquire)) {
}
}
inline void unlock() { flag.clear(std::memory_order_release); }
inline void extend(size_t n) {
mluOpTensorStruct *header = new (std::nothrow) mluOpTensorStruct[n];
mluOpTensorStruct *header =
(mluOpTensorStruct *)malloc(sizeof(mluOpTensorStruct) * n);
for (size_t i = 0; i < n; ++i) {
mluOpTensorStruct *desc = header + i;
queue.push_front(desc);
queue.push_front(header + i);
}
headers.push_back(header);
}
size_t extend_num = 128;
std::deque<mluOpTensorDescriptor_t> queue;
std::vector<mluOpTensorStruct *> headers;
std::atomic_flag flag = ATOMIC_FLAG_INIT;
};

static mluOpTensorDescriptorQueueStruct queue_array;
mluOpTensorDescriptorQueueStruct *queue_array = nullptr;

MLUOP_ATTRIBUTE_CONSTRUCTOR MLUOP_ATTRIBUTE_VISIBILITY_HIDDEN void mluOpInit() {
if (!queue_array) {
queue_array = new (std::nothrow) mluOpTensorDescriptorQueueStruct;
}
}

MLUOP_ATTRIBUTE_DESTRUCTOR MLUOP_ATTRIBUTE_VISIBILITY_HIDDEN void mluOpExit() {
if (queue_array) {
delete queue_array;
}
}

#endif
} // anonymous namespace

Expand All @@ -340,14 +359,14 @@ mluOpCreateTensorDescriptor(mluOpTensorDescriptor_t *desc) {
PARAM_CHECK("[mluOpCreateTensorDescriptor]", desc != NULL);

#if MLUOP_TENSOR_QUEUE_ENABLE
queue_array.lock();
if MLUOP_PREDICT_FALSE (queue_array.queue.empty()) {
queue_array.extend(queue_array.extend_num);
queue_array.extend_num *= 2;
queue_array->lock();
if MLUOP_PREDICT_FALSE (queue_array->queue.empty()) {
queue_array->extend(queue_array->extend_num);
queue_array->extend_num *= 2;
}
*desc = ::new (queue_array.queue.front()) mluOpTensorStruct;
queue_array.queue.pop_front();
queue_array.unlock();
*desc = ::new (queue_array->queue.front()) mluOpTensorStruct;
queue_array->queue.pop_front();
queue_array->unlock();
#else
mluOpTensorStruct *ts = new (std::nothrow) mluOpTensorStruct;
*desc = ts;
Expand All @@ -362,17 +381,17 @@ mluOpStatus_t MLUOP_WIN_API mluOpCreateGroupTensorDescriptors(
PARAM_CHECK("[mluOpCreateGroupTensorDescriptors]", desc_num > 0);

#if MLUOP_TENSOR_QUEUE_ENABLE
queue_array.lock();
if MLUOP_PREDICT_FALSE (queue_array.queue.size() < desc_num) {
queue_array.extend(std::max(queue_array.extend_num, (size_t)desc_num));
queue_array.extend_num =
2 * std::max(queue_array.extend_num, (size_t)desc_num);
queue_array->lock();
if MLUOP_PREDICT_FALSE (queue_array->queue.size() < desc_num) {
queue_array->extend(std::max(queue_array->extend_num, (size_t)desc_num));
queue_array->extend_num =
2 * std::max(queue_array->extend_num, (size_t)desc_num);
}
for (int i = 0; i < desc_num; ++i) {
*(group_desc[i]) = queue_array.queue.front();
queue_array.queue.pop_front();
*(group_desc[i]) = queue_array->queue.front();
queue_array->queue.pop_front();
}
queue_array.unlock();
queue_array->unlock();
#else
for (int i = 0; i < desc_num; ++i) {
mluOpTensorStruct *ts = new (std::nothrow) mluOpTensorStruct;
Expand Down Expand Up @@ -893,10 +912,10 @@ mluOpDestroyTensorDescriptor(mluOpTensorDescriptor_t desc) {
PARAM_CHECK("[mluOpDestroyTensorDescriptor]", desc != NULL);

#if MLUOP_TENSOR_QUEUE_ENABLE
queue_array.lock();
queue_array->lock();
desc->~mluOpTensorStruct();
queue_array.queue.push_front(desc);
queue_array.unlock();
queue_array->queue.push_front(desc);
queue_array->unlock();
#else
delete desc;
#endif
Expand All @@ -910,12 +929,12 @@ mluOpStatus_t MLUOP_WIN_API mluOpDestroyGroupTensorDescriptors(
PARAM_CHECK("[mluOpDestroyGroupTensorDescriptors]", desc_num > 0);

#if MLUOP_TENSOR_QUEUE_ENABLE
queue_array.lock();
queue_array->lock();
for (int i = 0; i < desc_num; ++i) {
group_desc[i][0]->~mluOpTensorStruct();
queue_array.queue.push_front(group_desc[i][0]);
queue_array->queue.push_front(group_desc[i][0]);
}
queue_array.unlock();
queue_array->unlock();
#else
for (int i = 0; i < desc_num; ++i) {
delete group_desc[i][0];
Expand Down

0 comments on commit c0026e3

Please sign in to comment.