-
Notifications
You must be signed in to change notification settings - Fork 106
/
mlu_op.h
10198 lines (9996 loc) · 377 KB
/
mlu_op.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*************************************************************************
* Copyright (C) [2022] by Cambricon, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#ifndef MLUOP_EXAMPLE_H_
#define MLUOP_EXAMPLE_H_
/******************************************************************************
* MLUOPS: Cambricon Open Source operator library for Network
******************************************************************************/
#define MLUOP_MAJOR 0
#define MLUOP_MINOR 5
#define MLUOP_PATCHLEVEL 302
#define MLUOP_DIM_MAX 8
#include <stdint.h>
#include "cnrt.h"
#ifndef MLUOP_WIN_API
#ifdef _WIN32
#define MLUOP_WIN_API __stdcall
#else
#define MLUOP_WIN_API
#endif
#endif
#if defined(__cplusplus)
extern "C" {
#endif
/******************************************************************************
* MLUOP Return Status
******************************************************************************/
/*! @brief Describes function return status.
*/
typedef enum {
MLUOP_STATUS_SUCCESS = 0, /*!< The operation is successfully completed. */
MLUOP_STATUS_NOT_INITIALIZED = 1,
/*!< MLUOP library is not initialized properly, which is usually caused by failing
to call ::mluOpCreate, ::mluOpCreateTensorDescriptor or ::mluOpSetTensorDescriptor.
Such error is usually due to incompatible MLU device or invalid driver environment.
Notice that ::mluOpCreate should be called prior to any other MLUOP function. */
MLUOP_STATUS_ALLOC_FAILED = 2,
/*!< This error occurs when the resource allocation fails, which is usually caused by
failing to call cnMallocHost due to exceeded memory usage. Please make sure that
the memory allocated previously is deallocated as much as possible. */
MLUOP_STATUS_BAD_PARAM = 3,
/*!< Invalid value or parameters are passed to the function, including data type, layout,
dimensions, etc. */
MLUOP_STATUS_INTERNAL_ERROR = 4,
/*!< An error occurs inside of the function, which may indicate an internal error or bug in
the library. This error is usually caused by failing to call cnrtMemcpyAsync. Please
check whether the memory passed to the function is deallocated before the completion
of the routine. */
MLUOP_STATUS_ARCH_MISMATCH = 5,
/*!< Invalid MLU device which is not supported by current function. */
MLUOP_STATUS_EXECUTION_FAILED = 6,
/*!< An error occurs when the function fails to be executed on MLU device due to multiple reasons.
You can check whether the hardware environment, driver version and other prerequisite
libraries are correctly installed. */
MLUOP_STATUS_NOT_SUPPORTED = 7,
/*!< An error occurs when the requested functionality is not supported in this version but would
be supported in the future. */
MLUOP_STATUS_NUMERICAL_OVERFLOW = 8,
/*!< A numerical overflow occurs when executing the function, which is usually due to large scale
or inappropriate range of value of input tensor. */
} mluOpStatus_t;
/******************************************************************************
* MLUOP Tensor Layout
******************************************************************************/
/*!
* @brief Describes the data layouts in MLUOP.
*
* The data can be defined in three, four, or five dimensions.
*
* Take images for example, the format of the data layout can be NCHW:
* - N: The number of images
* - C: The number of image channels
* - H: The height of images
* - W: The weight of images
*
* Take sequence for example, the format of the data layout can be TNC:
* - T: The timing steps of sequence
* - N: The batch size of sequence
* - C: The alphabet size of sequence
*/
typedef enum {
MLUOP_LAYOUT_NCHW = 0,
/*!< The data layout is in the following order: batch size, channel, height, and width. */
MLUOP_LAYOUT_NHWC = 1,
/*!< The data layout is in the following order: batch size, height, width, and channel. */
MLUOP_LAYOUT_HWCN = 2,
/*!< The data layout is in the following order: height, width, channel and batch size. */
MLUOP_LAYOUT_NDHWC = 3,
/*!< The data layout is in the following order: batch size, depth, height, width, and
* channel. */
MLUOP_LAYOUT_ARRAY = 4,
/*!< The data is multi-dimensional tensor. */
MLUOP_LAYOUT_NCDHW = 5,
/*!< The data layout is in the following order: batch size, channel, depth, height, and
* width. */
MLUOP_LAYOUT_TNC = 6,
/*!< The data layout is in the following order: timing steps, batch size, alphabet size. */
MLUOP_LAYOUT_NTC = 7,
/*!< The data layout is in the following order: batch size, timing steps, alphabet size. */
MLUOP_LAYOUT_NC = 8,
/*!< The data layout is in the following order: batch size, channel. */
MLUOP_LAYOUT_NLC = 9,
/*!< The data layout is in the following order: batch size, width, channel. */
} mluOpTensorLayout_t;
/******************************************************************************
* MLUOP Data Type
******************************************************************************/
/*! @brief Describes the data types in MLUOP. */
typedef enum {
MLUOP_DTYPE_INVALID = 0, /*!< An invalid data type. */
MLUOP_DTYPE_HALF = 1, /*!< A 16-bit floating-point data type. */
MLUOP_DTYPE_FLOAT = 2, /*!< A 32-bit floating-point data type. */
MLUOP_DTYPE_DOUBLE = 14, /*!< A 64-bit floating-point data type. */
MLUOP_DTYPE_INT8 = 3, /*!< An 8-bit signed integer data type. */
MLUOP_DTYPE_INT16 = 4, /*!< A 16-bit signed integer data type. */
MLUOP_DTYPE_INT32 = 6, /*!< A 32-bit signed integer data type. */
MLUOP_DTYPE_INT64 = 9, /*!< A 64-bit signed integer data type. */
MLUOP_DTYPE_UINT8 = 7, /*!< An 8-bit unsigned integer data type. */
MLUOP_DTYPE_UINT16 = 13, /*!< A 16-bit unsigned integer data type. */
MLUOP_DTYPE_UINT32 = 11, /*!< A 32-bit unsigned integer data type. */
MLUOP_DTYPE_UINT64 = 12, /*!< A 64-bit unsigned integer data type. */
MLUOP_DTYPE_BOOL = 8, /*!< A boolean data type. */
MLUOP_DTYPE_COMPLEX_HALF = 15, /*!< A 32-bit complex number of two fp16. */
MLUOP_DTYPE_COMPLEX_FLOAT = 16, /*!< A 64-bit complex number of two fp32. */
} mluOpDataType_t;
/*!
* @brief Describes whether to propagate NaN numbers.
*/
typedef enum {
MLUOP_NOT_PROPAGATE_NAN = 0, /*!< The NaN numbers are not propagated . */
MLUOP_PROPAGATE_NAN = 1, /*!< The NaN numbers are propagated. */
} mluOpNanPropagation_t;
/*!
* @brief Describes the options that can help choose the best suited algorithm used for
* implementation of the activation and accumulation operations.
**/
typedef enum {
MLUOP_COMPUTATION_FAST = 0,
/*!< Implementation with the fastest algorithm and lower precision. */
MLUOP_COMPUTATION_HIGH_PRECISION = 1,
/*!< Implementation with the high-precision algorithm regardless of the performance. */
} mluOpComputationPreference_t;
/*!
* @brief Describes the atomics modes in MLUOP.
*/
typedef enum {
MLUOP_ATOMICS_NOT_ALLOWED = 1,
/*!< The atomics is not allowed to cumulate results. */
MLUOP_ATOMICS_ALLOWED = 2,
/*!< The atomics is allowed to cumulate results. */
} mluOpAtomicsMode_t;
/*!
* @brief Describes the rounding modes of quantization conversion.
*/
typedef enum {
MLUOP_ROUND_HALF_TO_EVEN = 0,
/*!< The rounding mode to round towards the nearest even neighbor is used for
* quantization conversion. */
MLUOP_ROUND_HALF_UP = 1,
/*!< The rounding mode to round up towards the nearest neighbor is used for
* quantization conversion. */
MLUOP_ROUND_HALF_OFF_ZERO = 2,
/*!< The rounding mode to round half away from zero is used for quantization
* conversion. */
} mluOpQuantizeRoundMode_t;
/*!
* @brief Describes the modes of quantization method.
*/
typedef enum {
MLUOP_QUANTIZE_POSITION = 0,
/*!< Quantization method with position factor and without scale factor. */
MLUOP_QUANTIZE_POSITION_SCALE = 1,
/*!< Quantization method with position and scale factors. */
MLUOP_QUANTIZE_POSITION_SCALE_OFFSET = 2,
/*!< Asymmetric quantization method with position, scale, and offset factors. */
} mluOpQuantizeMode_t;
/*!
* @brief Describes the bases that are used in the implementation of the log function.
*/
typedef enum {
MLUOP_LOG_E = 0, /*!< The base e is used. */
MLUOP_LOG_2 = 1, /*!< The base 2 is used. */
MLUOP_LOG_10 = 2, /*!< The base 10 is used. */
} mluOpLogBase_t;
/*!
* @brief Describes the pointer modes that are used in the implementation of the fill function.
*/
typedef enum {
MLUOP_POINTER_MODE_HOST = 0,
/*!< A host pointer, which means that the values passed by reference are on the host. */
MLUOP_POINTER_MODE_DEVICE = 1,
/*!< A device pointer, which means that the values passed by reference are on the device. */
} mluOpPointerMode_t;
/******************************************************************************
* MLUOP Data Structure: Customized Operation
******************************************************************************/
/*!
* @brief Describes the attributes of the matrix multiplication computation.
*/
typedef enum {
MLUOP_MATMUL_DESC_COMPUTE_TYPE = 0,
/*!< Defines the data type used for multiplication and accumulation operations, and the
* accumulator for implementing matrix multiplication. It must be set before
* doing matrix multiplication. */
MLUOP_MATMUL_DESC_SCALE_TYPE = 1,
/*!< Defines the data type of the scaling factors \b alpha and \b beta. The default value
* is the same as ::MLUOP_MATMUL_DESC_COMPUTE_TYPE. It is not supported now. */
MLUOP_MATMUL_DESC_POINTER_MODE = 2,
/*!< Specifies whether \b alpha and \b beta are stored on the host or on the device.
* It is not supported now. */
MLUOP_MATMUL_DESC_TRANSA = 3,
/*!< Specifies whether the transpose should be performed on matrix A. The default value is
* 0 (false). */
MLUOP_MATMUL_DESC_TRANSB = 4,
/*!< Specifies whether the transpose should be performed on matrix B. The default value is
* 0 (false). */
MLUOP_MATMUL_DESC_TRANSC = 5,
/*!< Specifies whether the transpose should be performed on matrix C. The default value is
* 0 (false). It is not supported now. */
MLUOP_MATMUL_DESC_EPILOGUE = 6,
/*!< Specifies the epilogue function. It is not supported now. */
MLUOP_MATMUL_DESC_BIAS_POINTER = 7,
/*!< Pointer to bias vector on MLU device memory. Currently, it is only supported to set
* the attribute \b matmul_desc. */
MLUOP_MATMUL_DESC_EPILOGUE_TYPE = 8,
/*!< Specifies matmul multiplication epilogue fusion type. */
MLUOP_MATMUL_DESC_EPILOGUE_OPERAND = 9,
/*!< Specifies matmul multiplication epilogue fusion operand. */
MLUOP_MATMUL_ALLOW_TF32 = 10,
/*!< Determines whether to enable TensorFloat-32 mode. TensorFloat-32 is enabled by
* default. */
MLUOP_MATMUL_USE_BETA = 11,
/*!< Specifies whether to use \b beta on matrix C. */
MLUOP_MATMUL_CAST_MODE = 12,
/*!< Specifies the quantization mode used for the matrix multiplication quantization. */
MLUOP_MATMUL_USE_STRIDE = 13,
/*!< Specifies whether stride should be performed on tensor. */
} mluOpMatMulDescAttribute_t;
/*!
* @brief Describes the preference of matrix multiplication algorithm.
*/
typedef enum {
MLUOP_MATMUL_FASTEST = 0,
/*!< The high-speed preference is used. */
MLUOP_MATMUL_LOW_MEMORY_OCCUPY = 1,
/*!< The low-memory preference is used. This is not supported now. */
} mluOpMatMulPreference_t;
/*!
* @brief Describes the unique modes that can be used to implement the unique operation.
*/
typedef enum {
MLUOP_UNSORT_FORWARD = 0,
/*!< Returns the data in the same order as the input data after eliminating the
* duplicated values. */
MLUOP_SORT_ASCEND = 1,
/*!< Returns the data sorted in ascending order by input value after eliminating
* the duplicated values. */
MLUOP_UNSORT_REVERSE = 2,
/*!< Returns the data in the reversed order as the input data after eliminating
* the duplicated values. */
} mluOpUniqueSort_t;
/*!
* @brief Describes the modes that are used in the implementation of scatter_nd operation.
*/
typedef enum {
MLUOP_SCATTERND_ADD = 0,
/*!< The ADD operation is implemented. */
MLUOP_SCATTERND_SUB = 1,
/*!< The SUB (subtraction) operation is implemented. This mode is not supported currently. */
MLUOP_SCATTERND_MUL = 2,
/*!< The MUL (multiplication) operation is implemented. This mode is not supported currently. */
MLUOP_SCATTERND_UPDATE = 3,
/*!< The replacement operation is implemented. */
} mluOpScatterNdMode_t;
/*!
* @brief Describes the modes that are used in the implementation of the Reduce function.
*/
typedef enum {
MLUOP_REDUCE_ADD = 0, /*!< The reduce addition operation is implemented. */
MLUOP_REDUCE_AVG = 1, /*!< The reduce average operation is implemented. */
MLUOP_REDUCE_MUL = 2, /*!< The reduce multiplication operation is implemented. */
MLUOP_REDUCE_MAX = 3, /*!< The reduce maximum operation is implemented. */
MLUOP_REDUCE_MIN = 4, /*!< The reduce minimum operation is implemented. */
MLUOP_REDUCE_AND = 5, /*!< The reduce and operation is implemented. */
MLUOP_REDUCE_OR = 6, /*!< The reduce or operation is implemented. */
MLUOP_REDUCE_NORM1 = 7, /*!< The sum of absolute values operation is implemented. */
MLUOP_REDUCE_NORM2 = 8, /*!< The square root of sum of squares operation is implemented. */
MLUOP_REDUCE_MAX_LAST_INDEX = 9,
/*!< The operation of returning the index of the last maximum value is implemented. */
MLUOP_REDUCE_MIN_LAST_INDEX = 10,
/*!< The operation of returning the index of the last minimum value is implemented. */
MLUOP_REDUCE_NORMP = 11, /*!< The 1/p power of sum of p power operation is implemented. */
MLUOP_REDUCE_ASUM = 12,
/*!< The sum of absolute values operation adapted to Caffe framework is implemented. */
MLUOP_REDUCE_SUMSQ = 13,
/*!< The sum of the squared values operation adapted to Caffe framework is implemented. */
} mluOpReduceOp_t;
/*!
* @brief Describes whether the indices are computed in the implementation of the reduce function.
*/
typedef enum {
MLUOP_REDUCE_NO_INDICES = 0, /*!< The indices are not computed. */
MLUOP_REDUCE_FLATTENED_INDICES = 1, /*!< The indices and the corresponding values are computed. */
MLUOP_REDUCE_ONLY_INDICES = 2, /*!< Only the indices are calculated. */
} mluOpReduceIndices_t;
/*!
* @brief Describes the data type of indices used in the reduce function.
*/
typedef enum {
MLUOP_32BIT_INDICES = 0, /*!< The data type of indices is unsigned int. */
MLUOP_16BIT_INDICES = 1, /*!< The data type of indices is unsigned short. */
} mluOpIndicesType_t;
/******************************************************************************
* MLUOP Runtime Management
******************************************************************************/
/*!
* @struct mluOpContext
* @brief Describes the MLUOP context.
*/
struct mluOpContext;
/*!
* A pointer to ::mluOpContext struct that holds the MLUOP context.
*
* MLU device resources cannot be accessed directly, so MLUOP uses
* handle to manage MLUOP context including MLU device information
* and queues.
*
* The MLUOP context is created with ::mluOpCreate and the returned
* handle should be passed to all the subsequent function calls.
* You need to destroy the MLUOP context at the end with ::mluOpDestroy.
*/
typedef struct mluOpContext *mluOpHandle_t;
/*!
* The descriptor of the collection of tensor which is used in the RNN operation, such as weight,
* bias.
* You need to call the ::mluOpCreateTensorSetDescriptor function to create a descriptor, and
* call the ::mluOpInitTensorSetMemberDescriptor to set the information about each tensor in
* the tensor set. If the data type of the tensor in the tensor set is in fixed-point data type,
* call ::mluOpInitTensorSetMemberDescriptorPositionAndScale function to set quantization
* parameters.
* At last, you need to destroy the descriptor at the end with the
* ::mluOpDestroyTensorSetDescriptor function.
*/
typedef struct mluOpTensorSetStruct *mluOpTensorSetDescriptor_t;
// Group:Runtime Management
/*!
* @brief Initializes the MLUOP library and creates a handle \b handle to a structure
* that holds the MLUOP library context. It allocates hardware resources on the host
* and device. You need to call this function before any other MLUOP function.
*
* You need to call the ::mluOpDestroy function to release the resources later.
*
* @param[out] handle
* Pointer to an MLUOP context that is used to manage MLU devices and queues.
* For detailed information, see ::mluOpHandle_t.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - None.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpCreate(mluOpHandle_t *handle);
// Group:Runtime Management
/*!
* @brief Updates the MLUOP context information that is held by the \b handle. This function
* should be called if you call CNDrv API cnSetCtxConfigParam to set the context information.
* The related context information will be synchronized to MLUOP with this function. For
* detailed information, see "Cambricon CNDrv Developer Guide".
*
* @param[in] handle
* Pointer to an MLUOP context that is used to manage MLU devices. For detailed information,
* see ::mluOpHandle_t.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - None.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpUpdateContextInformation(mluOpHandle_t handle);
// Group:Runtime Management
/*!
* @brief Releases the resources of the specified MLUOP handle \b handle that was
* created by the ::mluOpCreate function. It is usually the last call to destroy
* the handle to the MLUOP handle.
*
* @param[in] handle
* Pointer to the MLU devices that holds information to be destroyed.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - None.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpDestroy(mluOpHandle_t handle);
// Group:Runtime Management
/*!
* @brief Sets the runtime queue \b queue in the handle \b handle. The queue is used to
* launch kernels or to synchronize to this queue.
*
* Before setting a queue \b queue, you need to call the ::mluOpCreate function to initialize
* MLUOP library, and call the cnrtCreateQueue function to create a queue \b queue.
*
* @param[in] handle
* Handle to an MLUOP context that is used to manage MLU devices and
* queues. For detailed information, see ::mluOpHandle_t.
* @param[in] queue
* The runtime queue to be set to the MLUOP handle.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - None.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpSetQueue(mluOpHandle_t handle, cnrtQueue_t queue);
// Group:Runtime Management
/*!
* @brief Retrieves the queue \b queue that was previously set to the handle \b handle.
*
* @param[in] handle
* Handle to an MLUOP context that is used to manage MLU devices and queues. For
* detailed information, see ::mluOpHandle_t.
* @param[out] queue
* Pointer to the queue that was previously set to the specified handle.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - None.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpGetQueue(mluOpHandle_t handle, cnrtQueue_t *queue);
// Group:Runtime Management
/*!
* @brief Converts the MLUOP enumerated status code to ASCIIZ static string and returns
* a pointer to the MLU memory that holds information about ASCIIZ static string with
* the status name. For example, when the input argument is ::MLUOP_STATUS_SUCCESS, the
* returned string is MLUOP_STATUS_SUCCESS. When an invalid status value is passed to
* the function, the returned string is ::MLUOP_STATUS_BAD_PARAM.
*
* @param[in] status
* The MLUOP enumerated status code.
*
* @par return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - None.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
const char *
mluOpGetErrorString(mluOpStatus_t status);
// Group:Tensor
/*!
* @brief Gets the size of a data type in ::mluOpDataType_t.
*
* @param[in] data_type
* The data type. For detailed information, see ::mluOpDataType_t.
* @param[out] size
* Host pointer to the size of the data type.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - None.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpGetSizeOfDataType(mluOpDataType_t data_type, size_t *size);
// Group:Version Management
/*!
* @brief Retrieves the version of MLUOP library. The version of MLUOP
* is composed of \b major, \b minor and \b patch. For instance, major = 1,
* minor = 2, patch = 3, the version of MLUOP library is 1.2.3.
*
* @param[in] major
* A pointer to scale factor that gets the major version of MLUOP library.
* @param[in] minor
* A pointer to scale factor that gets the minor version of MLUOP library.
* @param[in] patch
* A pointer to scale factor that gets the patch version of MLUOP library.
*
* @par return
* - None.
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - None.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
void
mluOpGetLibVersion(int *major, int *minor, int *patch);
// Group:QuantizeRoundMode
/*!
* @brief Updates the specific rounding mode of MLUOP context information that holds by the \b
* handle. This function should be called if you want to change the MLUOP rounding mode that
* is used to cumulate the results. For detailed information, see "Cambricon CNDrv Developer
* Guide".
*
* @param[in] handle
* Pointer to an MLUOP context that is used to manage MLU devices and queues. For detailed
* information, see ::mluOpHandle_t.
* @param[in] round_mode
* The rounding mode of quantization conversion to be set to the MLUOP handle.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
* @par Note
* - On MLU200 series:
* You cannot set MLUOP_ROUND_HALF_TO_EVEN for the rounding mode because the hardware does
* not support it.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpSetQuantizeRoundMode(mluOpHandle_t handle, mluOpQuantizeRoundMode_t round_mode);
// Group:QuantizeRoundMode
/*!
* @brief Retrieves the rounding mode of a specific MLUOP context.
*
* @param[in] handle
* Pointer to an MLUOP context that is used to manage MLU devices and queues. For detailed
* information, see ::mluOpHandle_t.
* @param[out] round_mode
* The rounding mode of quantization conversion that was previously set to the specified handle.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - The rounding mode of initialized ::mluOpHandle_t is MLUOP_ROUND_TO_EVEN.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpGetQuantizeRoundMode(mluOpHandle_t handle, mluOpQuantizeRoundMode_t *round_mode);
// Group:Runtime Management
/*!
* @brief Updates the specific atomics mode of MLUOP context information that is held by the
* \b handle. This function should be called if you want to change the atomics mode that is
* used to cumulate the results.For detailed information, see "Cambricon CNDrv Developer Guide".
*
* @param[in] handle
* Pointer to an MLUOP context that is used to manage MLU devices and queues. For detailed
* information, see ::mluOpHandle_t.
* @param[in] atomics_mode
* The atomics mode.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - None.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpSetAtomicsMode(mluOpHandle_t handle, mluOpAtomicsMode_t atomics_mode);
// Group:Runtime Management
/*!
* @brief Retrieves the atomics mode of a specific MLUOP context.
*
* @param[in] handle
* Pointer to an MLUOP context that is used to manage MLU devices and queues. For
* detailed information, see ::mluOpHandle_t.
* @param[out] atomics_mode
* The atomics mode.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - The default atomics mode of default initialized ::mluOpHandle_t is ::MLUOP_ATOMICS_NOT_ALLOWED.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpGetAtomicsMode(mluOpHandle_t handle, mluOpAtomicsMode_t *atomics_mode);
/******************************************************************************
* MLUOP Data Structure: Descriptor
* The struct represent node, weight and the AI network layer
******************************************************************************/
/*!
* The descriptor of a tensor that holds the information including tensor
* layout, data type, the number of dimensions, shape and strides.
*
* You need to call the ::mluOpCreateTensorDescriptor function to create a descriptor,
* and call the ::mluOpSetTensorDescriptor function or the ::mluOpSetTensorDescriptorEx
* function to set the tensor information to the descriptor. Also, you need to destroy
* the MLUOP context at the end with the ::mluOpDestroyTensorDescriptor function.
*/
typedef struct mluOpTensorStruct *mluOpTensorDescriptor_t;
/*!
* The descriptor of the matrix multiplication function that holds compute type, bias type,
* transpose flag, and other attributes defined in ::mluOpMatMulDescAttribute_t.
*
* You need to call the ::mluOpMatMulDescCreate function to create a descriptor, and call
* the ::mluOpSetMatMulDescAttr function to set the information of the matrix multiplication
* to the descriptor. Also, you need to destroy the MLUOP context at the end with
* the ::mluOpMatMulDescDestroy function.
*/
typedef struct mluOpMatMulStruct *mluOpMatMulDescriptor_t;
/*!
* The descriptor of a tensor that holds the information including tensor
* shape, the number of dimensions, pad, strides, dilation, sub_m, transpose.
*
* You need to call the ::mluOpCreateSparseConvolutionDescriptor function to create a descriptor,
* and call the ::mluOpSetSparseConvolutionDescriptor function to set the tensor information to
* the descriptor. Also, you need to destroy the MLUOP context at the end with
* the ::mluOpDestroySparseConvolutionDescriptor function.
*/
typedef struct mluOpSparseConvolutionStruct *mluOpSparseConvolutionDescriptor_t;
/*!
* The descriptor of the matrix multiplication that holds the configured matrix multiplication
* algorithm descriptor and its runtime properties.
*
* You need to call the ::mluOpCreateMatMulHeuristicResult function to create a descriptor.
* Also, you need to destroy the MLUOP context at the end with
* the ::mluOpDestroyMatMulHeuristicResult function.
*/
typedef struct mluOpMatMulHeuristicResult *mluOpMatMulHeuristicResult_t;
/*!
* The descriptor of the matrix multiplication that holds the preferences for
* mluOpMatMulHeuristicResult_t configuration.
*/
typedef struct mluOpMatMulPrefer *mluOpMatMulPrefer_t;
/*!
* The descriptor of the matrix multiplication computation algorithm.
*
* You need to call the ::mluOpMatMulAlgoCreate function to create a descriptor.
* Also, you need to destroy the MLUOP context at the end with
* the ::mluOpMatMulAlgoDestroy function.
*/
typedef struct mluOpMatMulAlgoStruct *mluOpMatMulAlgo_t;
/*!
* The descriptor of Reduce function that holds ::mluOpReduceOp_t,
* ::mluOpDataType_t, ::mluOpNanPropagation_t, ::mluOpReduceIndices_t, and ::mluOpIndicesType_t.
*/
typedef struct mluOpReduceStruct *mluOpReduceDescriptor_t;
/*!
* The descriptor of the transpose operation that holds transpose information
* including \b dimensions and \b permute.
*
* You need to call the ::mluOpCreateTransposeDescriptor function to create a descriptor,
* and call the ::mluOpSetTransposeDescriptor function to set the information of
* transpose operation to the descriptor. Also, you need to destroy the MLUOP context
* at the end with the ::mluOpDestroyTransposeDescriptor function.
*/
typedef struct mluOpTransposeStruct *mluOpTransposeDescriptor_t;
/*!
* The descriptor of Unique function that holds mluOpUniqueSort_t, dim, return_inverse,
* and return_counts.
*
* You need to call the ::mluOpCreateUniqueDescriptor to create a descriptor,
* and call the ::mluOpSetUniqueDescriptor to set the information of the unique operation to
* the descriptor. At last, you need to destroy the descriptor at the end with the
* ::mluOpDestroyUniqueDescriptor function.*/
typedef struct mluOpUniqueStruct *mluOpUniqueDescriptor_t;
/*!
* The descriptor of CARAFE (Content-Aware ReAssembly of FEatures) operation that holds
* CARAFE information including the number of input dimensions, kernel size, group size,
* and scale factor.
*
* You need to call the ::mluOpCreateCarafeDescriptor function to create a descriptor,
* and call the ::mluOpSetCarafeDescriptor function to set the information of the CARAFE operation
* to the descriptor. Also, you need to destroy the MLUOP context at the end with the
* ::mluOpDestroyCarafeDescriptor function.
*/
typedef struct mluOpCarafeStruct *mluOpCarafeDescriptor_t;
// Group:Tensor
/*!
* @brief Creates a tensor descriptor pointed by \b desc that holds the dimensions, data type,
* and layout of input tensor. If the input tensor is in fixed-point data type, the
* ::mluOpSetTensorDescriptorPositionAndScale function or the ::mluOpSetTensorDescriptorPosition
* function needs to be called to set quantization parameters.
*
* The ::mluOpDestroyTensorDescriptor function needs to be called to destroy the tensor descriptor
* later.
*
* @param[in] desc
* Pointer to the struct that holds information about the tensor descriptor.
*
* @par Return
* - ::MLUOP_STATUS_SUCCESS, ::MLUOP_STATUS_BAD_PARAM
*
* @par Data Type
* - None.
*
* @par Data Layout
* - None.
*
* @par Scale Limitation
* - None.
*
* @par API Dependency
* - None.
*
* @par Note
* - None.
*
* @par Example
* - None.
*
* @par Reference
* - None.
*/
mluOpStatus_t MLUOP_WIN_API
mluOpCreateTensorDescriptor(mluOpTensorDescriptor_t *desc);
// Group:GetIndicePairs
/*!
* @brief Creates a tensor descriptor pointed by \b desc that holds the dimensions, pad, stride,
* dilation, sub_m, transpose, inverse and layout of input filter and output tensor shape. The
* ::mluOpSetSparseConvolutionDescriptor function needs to be called to set parameters.
*
* The ::mluOpDestroySparseConvolutionDescriptor function needs to be called to destroy the
* tensor descriptor later.