58 #define OFFSET(x) offsetof(DnnProcessingContext, x) 59 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM | AV_OPT_FLAG_VIDEO_PARAM 63 #if (CONFIG_LIBTENSORFLOW == 1) 123 #define LOG_FORMAT_CHANNEL_MISMATCH() \ 124 av_log(ctx, AV_LOG_ERROR, \ 125 "the frame's format %s does not match " \ 126 "the model input channel %d\n", \ 127 av_get_pix_fmt_name(fmt), \ 128 model_input->channels); 136 if (model_input->
height != -1 && model_input->
height != inlink->
h) {
138 model_input->
height, inlink->
h);
141 if (model_input->
width != -1 && model_input->
width != inlink->
w) {
143 model_input->
width, inlink->
w);
155 av_log(ctx,
AV_LOG_ERROR,
"only support dnn models with input data type as float32 and uint8.\n");
280 if (inlink->
w != outlink->
w || inlink->
h != outlink->
h) {
332 (
const int [4]){frame->width * 3 * sizeof(float), 0, 0, 0});
337 bytewidth, frame->
height);
344 bytewidth, frame->
height);
353 (
const int [4]){frame->width * sizeof(float), 0, 0, 0});
372 (
const int[4]){frame->width * 3 * sizeof(float), 0, 0, 0},
378 dnn_output->
data, bytewidth,
379 bytewidth, frame->
height);
387 dnn_output->
data, bytewidth,
388 bytewidth, frame->
height);
393 dnn_output->
data, bytewidth,
394 bytewidth, frame->
height);
402 (
const int[4]){frame->width * sizeof(float), 0, 0, 0},
428 for (
int i = 1;
i < 3; ++
i) {
432 bytewidth, uv_height);
511 .
name =
"dnn_processing",
517 .
inputs = dnn_processing_inputs,
518 .
outputs = dnn_processing_outputs,
519 .priv_class = &dnn_processing_class,
int av_image_get_linesize(enum AVPixelFormat pix_fmt, int width, int plane)
Compute the size of an image line with format pix_fmt and width width for the plane plane...
static enum AVPixelFormat pix_fmt
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
This structure describes decoded (raw) audio or video data.
static const AVOption dnn_processing_options[]
void(* free_model)(DNNModel **model)
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Main libavfilter public API header.
packed RGB 8:8:8, 24bpp, RGBRGB...
int h
agreed upon image height
struct SwsContext * sws_uv_scale
static const AVFilterPad dnn_processing_outputs[]
struct SwsContext * sws_grayf32_to_gray8
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
static av_cold void uninit(AVFilterContext *ctx)
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
const char * name
Pad name.
AVFilterLink ** inputs
array of pointers to input links
#define av_assert0(cond)
assert() equivalent, that is always enabled.
DNNModel *(* load_model)(const char *model_filename)
AVFilter ff_vf_dnn_processing
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
static int check_modelinput_inlink(const DNNData *model_input, const AVFilterLink *inlink)
struct SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
Allocate and return an SwsContext.
static int config_input(AVFilterLink *inlink)
A filter pad used for either input or output.
A link between two filters.
static int copy_from_frame_to_dnn(DnnProcessingContext *ctx, const AVFrame *frame)
DNNReturnType(* get_input)(void *model, DNNData *input, const char *input_name)
#define i(width, name, range_min, range_max)
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
static av_cold int init(AVFilterContext *context)
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
void * priv
private data for use by the filter
simple assert() macros that are a bit more flexible than ISO C assert().
planar YUV 4:2:2, 16bpp, (1 Cr & Cb sample per 2x1 Y samples)
int w
agreed upon image width
uint64_t flags
Combination of AV_PIX_FMT_FLAG_...
uint8_t nb_components
The number of components each pixel has, (1-4)
static int copy_uv_planes(DnnProcessingContext *ctx, AVFrame *out, const AVFrame *in)
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
packed RGB 8:8:8, 24bpp, BGRBGR...
AVFilterContext * src
source filter
static const AVFilterPad dnn_processing_inputs[]
static int prepare_sws_context(AVFilterLink *outlink)
AVFILTER_DEFINE_CLASS(dnn_processing)
DNN inference engine interface.
static const AVFilterPad inputs[]
static const AVFilterPad outputs[]
int format
agreed upon media format
int format
format of the frame, -1 if unknown or unset Values correspond to enum AVPixelFormat for video frames...
#define AV_PIX_FMT_GRAYF32
int linesize[AV_NUM_DATA_POINTERS]
For video, size in bytes of each picture line.
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
int attribute_align_arg sws_scale(struct SwsContext *c, const uint8_t *const srcSlice[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
swscale wrapper, so we don't need to export the SwsContext.
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
planar YUV 4:1:0, 9bpp, (1 Cr & Cb sample per 4x4 Y samples)
Describe the class of an AVClass context structure.
static int query_formats(AVFilterContext *context)
static int copy_from_dnn_to_frame(DnnProcessingContext *ctx, AVFrame *frame)
static av_always_inline int isPlanarYUV(enum AVPixelFormat pix_fmt)
const char * name
Filter name.
AVFilterLink ** outputs
array of pointers to output links
static enum AVPixelFormat pix_fmts[]
DNNModule * ff_get_dnn_module(DNNBackendType backend_type)
DNNReturnType(* execute_model)(const DNNModel *model, DNNData *outputs, uint32_t nb_output)
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
static int filter_frame(AVFilterLink *inlink, AVFrame *in)
planar YUV 4:1:1, 12bpp, (1 Cr & Cb sample per 4x1 Y samples)
AVFilterContext * dst
dest filter
#define LOG_FORMAT_CHANNEL_MISMATCH()
struct SwsContext * sws_gray8_to_grayf32
DNNReturnType(* set_input_output)(void *model, DNNData *input, const char *input_name, const char **output_names, uint32_t nb_output)
DNNBackendType backend_type
void av_image_copy_plane(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int bytewidth, int height)
Copy image plane from src to dst.
const char * av_get_pix_fmt_name(enum AVPixelFormat pix_fmt)
Return the short name for a pixel format, NULL in case pix_fmt is unknown.
static int config_output(AVFilterLink *outlink)
AVPixelFormat
Pixel format.
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
#define AV_CEIL_RSHIFT(a, b)
#define check(x, y, S, v)