forked from NVIDIA/nvvl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PictureSequence.h
562 lines (493 loc) · 14.5 KB
/
PictureSequence.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
#pragma once
#ifndef CFFI
# include <stddef.h>
# include <stdint.h>
# include <cuda_runtime.h>
#else
struct CUstream_st;
typedef struct CUstream_st* cudaStream_t;
#endif
#ifdef __cplusplus
extern "C" {
#else
#include <stdbool.h>
#endif
/**
* How the image is scaled up/down from the original
*/
enum NVVL_ScaleMethod {
/**
* The value for the nearest neighbor is used, no interpolation
*/
ScaleMethod_Nearest,
/**
* Simple bilinear interpolation of four nearest neighbors
*/
ScaleMethod_Linear
// These are possibilities but currently unimplemented (PRs welcome)
// ScaleMethod_Area
// ScaleMethod_Cubic
// ScaleMethod_Lanczos
};
/**
* How the chroma channels are upscaled from yuv 4:2:0 to 4:4:4
*/
enum NVVL_ChromaUpMethod {
/**
* Simple bilinear interpolation of four nearest neighbors
*/
ChromaUpMethod_Linear
// These are possibilities but currently unimplemented (PRs welcome)
// ChromaUpMethod_CatmullRom
};
/**
* Color space to return picture in
*/
enum NVVL_ColorSpace {
/**
* RGB. channel 0 is red, 1 is green, 2 is blue
*/
ColorSpace_RGB,
/**
* YCbCr. channel 0 is luma, 1 is blue-difference chroma, 2 is red-difference chroma
*/
ColorSpace_YCbCr
//ColorSpace_YCgCo
};
/**
* Different types of data that a layer in a PictureSequence can hold, used for C interface
*/
enum NVVL_PicDataType {
PDT_NONE,
PDT_BYTE,
PDT_HALF,
PDT_FLOAT
};
/**
* Description of a layer in a frame sequence.
*/
struct NVVL_LayerDesc {
/**
* Number of frames in this Layer.
*
* This does not need to be the same as the number of frames in the sequence.
*/
uint16_t count;
/**
* Number of color channels in this layer.
*
* Must match the number of channels in the color space requested.
*/
uint8_t channels;
/**
* Width of this layer.
*
* Each frame is cropped to this size after scaling.
*/
uint16_t width;
/**
* Height of this layer.
*
* Each frame is cropped to this size after scaling.
*/
uint16_t height;
/**
* Location of the crop within the scaled frame.
*
* Must be set such that crop_x + width <= sequence_width
*/
uint16_t crop_x;
/**
* Location of the crop within the scaled frame.
*
* Must be set such that crop_y + height <= sequence_height
*/
uint16_t crop_y;
/**
* Size the original frame is scaled to before cropping.
*
* If left 0, no scaling is performed.
*/
uint16_t scale_width;
/**
* Size the original frame is scaled to before cropping.
*
* If left 0, no scaling is performed.
*/
uint16_t scale_height;
/**
* Indicates if the frame should be flipped horizontally before scaling.
*/
bool horiz_flip;
/**
* Indicates if the pixel values should be normalized to [0,1]
*
* If False, pixels will be in the standard [0,255] range. If
* True, T must be a floating point type.
*/
bool normalized;
/**
* Color space of output
*
* \see NVVL_ColorSpace
*/
enum NVVL_ColorSpace color_space;
/**
* Method to upscale the chroma channel in 4:2:0 to 4:4:4 conversion.
*
* \see NVVL_ChromaUpMethod
*/
enum NVVL_ChromaUpMethod chroma_up_method;
/**
* Method used to scale frame from original size to scale_width x
* scale_height.
*
* \see NVVL_ScaleMethod
*/
enum NVVL_ScaleMethod scale_method;
/**
* Strides for the various dimensions.
*
* These are element strides, not byte strides. So, for
* example, if T is float, a stride of "10" is a stride of
* 10*sizeof(float) bytes.
*/
struct {
size_t x;
size_t y;
size_t c;
size_t n;
} stride;
};
/**
* Dynamically typed C version of NVVL::PictureSequence::Layer.
*
*/
struct NVVL_PicLayer {
/**
* Type of data
*
* \see NVVL_PicDataType
*/
enum NVVL_PicDataType type;
/**
* Elements of the description shared with the C++ NVVL::PictureSequence::Layer
*
* \see NVVL_LayerDesc
*/
struct NVVL_LayerDesc desc;
/**
* Equivalent to NVVL::PictureSequence::Layer::index_map
*
* If NULL, use a 1-to-1 mapping. Otherwise must point to an
* array of length index_map_length.
*
* \see NVVL::PictureSequence::Layer::index_map
*/
const int* index_map;
/**
* Number of ints in \c index_map
*/
int index_map_length;
/**
* The actual data of the layer
*
* The type of data pointed to by this is determined by \c type
*/
void* data;
};
/**
* Opaque handle to a PictureSequence
*/
typedef void* PictureSequenceHandle;
/**
* Different types of metadata that can be retrieved
*/
enum NVVL_PicMetaType {
PMT_INT,
PMT_STRING
};
/**
* Wrapper for PictureSequence::PictureSequence
*
* Creates the sequence on the current device
*/
PictureSequenceHandle nvvl_create_sequence(uint16_t count);
/**
* Wrapper for PictureSequence::PictureSequence
*
* Creates the sequence on the given device
*/
PictureSequenceHandle nvvl_create_sequence_device(uint16_t count, int device_id);
/**
* Wrapper for PictureSequence::SetLayer()
*
* All options and index_map are copied into the sequence.
*/
void nvvl_set_layer(PictureSequenceHandle sequence,
const struct NVVL_PicLayer* layer,
const char* name);
/**
* Wrapper for PictureSequence::get_or_add_meta()
*
* \return a pointer to an array of type `type` containing the
* metadata `name`, return NULL if the named metadata does not
* exist. Does not (currently) support strings.
*/
void* nvvl_get_or_add_meta_array(PictureSequenceHandle sequence, enum NVVL_PicMetaType type, const char* name);
/**
* Wrapper for PictureSequence::get_meta()
*/
const void* nvvl_get_meta_array(PictureSequenceHandle sequence, enum NVVL_PicMetaType type, const char* name);
/**
* Get the string of metadata `name` corresponding to `index`
*
* \param sequence handle to a valid PictureSequence
* \param name name of the metadata to retrieve
* \param index Which frame index to get
*/
const char* nvvl_get_meta_str(PictureSequenceHandle sequence, const char* name, int index);
/**
* Wrapper for PictureSequence::count()
*/
int nvvl_get_sequence_count(PictureSequenceHandle sequence);
/**
* Wrapper for PictureSequence::get_layer()
*
* Note that the PictureSequence retains ownership of the index_map,
* so the returned index_map is only valid while the PictureSequence
* exists.
*/
struct NVVL_PicLayer nvvl_get_layer(PictureSequenceHandle sequence,
enum NVVL_PicDataType type,
const char* name);
/**
* Wrapper for PictureSequence::get_layer()
*
* Since this is a copy of the layer description, index_map is left
* NULL to avoid returning a pointer to dynamically allocated memory.
*/
struct NVVL_PicLayer nvvl_get_layer_indexed(PictureSequenceHandle sequence,
enum NVVL_PicDataType type,
const char* name,
int index);
/**
* Wrapper for PictureSequence::wait()
*/
void nvvl_sequence_wait(PictureSequenceHandle sequence);
/**
* Wrapper for PictureSequence::wait(cudaStream_t)
*/
void nvvl_sequence_stream_wait(PictureSequenceHandle sequence, cudaStream_t stream);
/**
* Free a PictureSequence
*/
void nvvl_free_sequence(PictureSequenceHandle sequence);
#ifdef __cplusplus
} // end extern "C"
#include <cuda.h>
#include <memory>
#include <string>
#include <vector>
namespace NVVL {
namespace detail {
class Decoder;
}
// C++ scoping for these C structs
using ScaleMethod = NVVL_ScaleMethod;
using ChromaUpMethod = NVVL_ChromaUpMethod;
using ColorSpace = NVVL_ColorSpace;
using PicLayer = NVVL_PicLayer;
using LayerDesc = NVVL_LayerDesc;
class PictureSequence {
public:
/** Create an empty PictureSequence on the current device.
*
* \param count The number of frames to receive from the
* decoder. Not all of the frames received from the decoder need
* to be used in any layer, so some layers may have a different
* count.
*/
PictureSequence(uint16_t count);
/** Create an empty PictureSequence on the given device.
*
* \param count The number of frames to receive from the
* decoder. Not all of the frames received from the decoder need
* to be used in any layer, so some layers may have a different
* count.
*
* \param device_id device associated with the sequence. This
* must be the same device that the data in the layers is
* allocated on and that the VideoLoader is associated with.
*/
PictureSequence(uint16_t count, int device_id);
/**
* A full description of a layer
*/
template<typename T>
struct Layer {
/**
* Elements of description shared with the C version PicLayer.
*
* \see NVVL_LayerDesc
*/
LayerDesc desc;
/**
* Map from indices into the decoded sequence to indices in this Layer.
*
* An empty vector indicates a 1-to-1 mapping from sequence to layer.
*
* For examples, To reverse the frames, set index_map
* to {4, 3, 2, 1, 0}.
*
* An index of -1 indicates that the decoded frame should not
* be used in this layer. For example, to extract just the
* middle frame from a sequence of 5 frames, set index_map to
* {-1, -1, 0, -1, -1}.
*
* If the size of index_map is less than the number of frames
* in the sequence, then those extra frames will not be
* used. For example, if index_map is {-1, 0} and the sequence
* is 5 frames, only the second frame in the sequence will be
* placed into the output array (at index 0).
*
* It is up to the user to ensure that all indices are smaller
* than the size of this layer.
*/
std::vector<int> index_map;
/**
* Pointer to the multi-dimensional tensor to place the frames into.
*
* The smallest dimension should be padded for optimal
* performance, see the CUDA documentation for cudaMallocPitch
* for details.
*/
T* data;
};
/**
* Add a layer to this sequence
*
* All the options and the index_map are copied in, but the caller
* maintains ownership of the data, which it sould keep valid until
* the data has been retrieved from the picture sequence.
*
* \param name name the layer should be given
* \param layer description of the layer
*/
template<typename T>
void set_layer(std::string name, const Layer<T>& layer);
/**
* Overload for set_layer that takes a C-style PicLayer
*/
template<typename T>
void set_layer(std::string name, const PicLayer* layer) {
auto l = PictureSequence::Layer<T>{};
l.data = reinterpret_cast<decltype(l.data)>(layer->data);
l.desc = layer->desc;
if (layer->index_map) {
l.index_map.insert(l.index_map.end(), layer->index_map, layer->index_map + layer->index_map_length);
}
set_layer(name, l);
}
/**
* Retrieve a layer from the sequence with data pointing to specific index
*
* \param name name of the layer
* \param index index to adjust the data pointer to
*
* \return Copy of the layer description
*/
template<typename T>
Layer<T> get_layer(std::string name, int index) const;
/**
* Retrieve a reference to a layer from the sequence
*
* \param name name of the layer
*
* \return const reference to the layer
*/
template<typename T>
const Layer<T>& get_layer(std::string name) const;
/**
* Check if sequence has the named layer
*
* \param name name of data layer to look for
*
* \return True if layer \c name exists in this sequence
*/
bool has_layer(std::string name) const;
/**
* Get the vector for the named meta, adding it if it exists
*
* \param name name of meta array to add
*
* \return non-const reference to meta vector for \c name
*/
template<typename T>
std::vector<T>& get_or_add_meta(std::string name);
/**
* Get a const reference to meta aray for \c name
*
* \param name name of meta array to get
*
* \return const refernece to meta array
*/
template<typename T>
const std::vector<T>& get_meta(std::string name) const;
/**
* Check if sequence has the named meta array
*
* \param name name of meta array to look for
*
* \return True if meta array \c name exists in the sequence
*/
bool has_meta(std::string name) const;
/**
* The number of frames retrieved from the decoder for this sequence
*
* Returned count is not necessarily the count for any of the layers
*
* \return the number of frames retrieved
*/
int count() const;
/**
* Set the number of frames to retrieve from the decoder
*
* \param count the number of frames to retrieve
*/
void set_count(int count);
/**
* Synchronously wait for the sequence to be ready to use
*/
void wait() const;
/**
* Synchronously wait until ready, then insert a wait event into
* stream.
*
* Waits until the transfer from the decoder in the data layer has
* begun, then inserts a wait event into \c stream signalling the
* completion of the transfer.
*
* Until the transfer from the decoder has begun, the event to
* wait on has not captured any work so we don't have anything to
* wait on.
*
* \param stream The CUDA stream to insert the wait event into.
*/
void wait(cudaStream_t stream) const;
// need these for pImpl pointer to be happy
~PictureSequence();
PictureSequence(PictureSequence&&);
PictureSequence& operator=(PictureSequence&&);
PictureSequence(const PictureSequence&) = delete;
PictureSequence& operator=(const PictureSequence&) = delete;
private:
// we use pImpl here to prevent copying a slew of headers for installation
// (i.e. a chunk of boost for boost::variant)
class impl;
std::unique_ptr<impl> pImpl;
// Decoder's needs to record the event and indicate transfer has started
friend class detail::Decoder;
};
}
#endif // ifdef __cplusplus