diff options
author | FlyingRat <flyingrat@outlook.com> | 2013-04-07 16:36:04 +0200 |
---|---|---|
committer | FlyingRat <flyingrat@outlook.com> | 2013-04-07 16:36:04 +0200 |
commit | 0e63a815aa6af63a21848e04b683d3f506dd41b1 (patch) | |
tree | 002f61d8a5b1d294d99fd4ba5b6982d76a612f0c /lib/ffmpeg/libpostproc | |
parent | 71862137c5337fc678681a23bfbc65f4db7a7b2f (diff) |
[FFmpeg] version bump to n1.2 (rev e820e3a) - lib/ffmpeg
This commit now contains the original patches sub directory:
patches - Org dir that contains applied xbmc custom patches.
patches/README-patches - New README file with info about xbmc patches.
patches/obsolete-patches - New dir with obsolete xbmc patches.
Diffstat (limited to 'lib/ffmpeg/libpostproc')
-rw-r--r-- | lib/ffmpeg/libpostproc/Makefile | 3 | ||||
-rw-r--r-- | lib/ffmpeg/libpostproc/postprocess.c | 229 | ||||
-rw-r--r-- | lib/ffmpeg/libpostproc/postprocess.h | 30 | ||||
-rw-r--r-- | lib/ffmpeg/libpostproc/postprocess_altivec_template.c | 2 | ||||
-rw-r--r-- | lib/ffmpeg/libpostproc/postprocess_internal.h | 24 | ||||
-rw-r--r-- | lib/ffmpeg/libpostproc/postprocess_template.c | 293 | ||||
-rw-r--r-- | lib/ffmpeg/libpostproc/version.h | 45 |
7 files changed, 358 insertions, 268 deletions
diff --git a/lib/ffmpeg/libpostproc/Makefile b/lib/ffmpeg/libpostproc/Makefile index 6242157e71..3fb5a70806 100644 --- a/lib/ffmpeg/libpostproc/Makefile +++ b/lib/ffmpeg/libpostproc/Makefile @@ -3,6 +3,7 @@ include $(SUBDIR)../config.mak NAME = postproc FFLIBS = avutil -HEADERS = postprocess.h +HEADERS = postprocess.h \ + version.h \ OBJS = postprocess.o diff --git a/lib/ffmpeg/libpostproc/postprocess.c b/lib/ffmpeg/libpostproc/postprocess.c index ed5c240012..facfd2cdd9 100644 --- a/lib/ffmpeg/libpostproc/postprocess.c +++ b/lib/ffmpeg/libpostproc/postprocess.c @@ -80,9 +80,9 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks #include <stdio.h> #include <stdlib.h> #include <string.h> -//#undef HAVE_MMX2 -//#define HAVE_AMD3DNOW -//#undef HAVE_MMX +//#undef HAVE_MMXEXT_INLINE +//#define HAVE_AMD3DNOW_INLINE +//#undef HAVE_MMX_INLINE //#undef ARCH_X86 //#define DEBUG_BRIGHTNESS #include "postprocess.h" @@ -116,7 +116,7 @@ const char *postproc_license(void) #define TEMP_STRIDE 8 //#define NUM_BLOCKS_AT_ONCE 16 //not used yet -#if ARCH_X86 +#if ARCH_X86 && HAVE_INLINE_ASM DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL; DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL; DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL; @@ -130,7 +130,7 @@ DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL; DECLARE_ASM_CONST(8, int, deringThreshold)= 20; -static struct PPFilter filters[]= +static const struct PPFilter filters[]= { {"hb", "hdeblock", 1, 1, 3, H_DEBLOCK}, {"vb", "vdeblock", 1, 2, 4, V_DEBLOCK}, @@ -150,6 +150,7 @@ static struct PPFilter filters[]= {"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER}, {"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER}, {"fq", "forcequant", 1, 0, 0, FORCE_QUANT}, + {"be", "bitexact", 1, 0, 0, BITEXACT}, {NULL, NULL,0,0,0,0} //End Marker }; @@ -164,7 +165,7 @@ static const char *replaceTable[]= }; -#if ARCH_X86 +#if ARCH_X86 && HAVE_INLINE_ASM static inline void prefetchnta(void *p) { __asm__ volatile( "prefetchnta (%0)\n\t" @@ -200,7 +201,7 @@ static inline void prefetcht2(void *p) /** * Check if the given 8x8 Block is mostly "flat" */ -static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) +static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c) { int numEq= 0; int y; @@ -223,7 +224,7 @@ static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c) /** * Check if the middle 8x8 Block in the given 8x16 block is flat */ -static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c) +static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c) { int numEq= 0; int y; @@ -245,7 +246,7 @@ static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c) return numEq > c->ppMode.flatnessThreshold; } -static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) +static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP) { int i; for(i=0; i<2; i++){ @@ -261,7 +262,7 @@ static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP) return 1; } -static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) +static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP) { int x; src+= stride*4; @@ -274,7 +275,7 @@ static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP) return 1; } -static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c) +static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c) { if( isHorizDC_C(src, stride, c) ){ if( isHorizMinMaxOk_C(src, stride, c->QP) ) @@ -286,7 +287,7 @@ static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c) } } -static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c) +static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c) { if( isVertDC_C(src, stride, c) ){ if( isVertMinMaxOk_C(src, stride, c->QP) ) @@ -298,7 +299,7 @@ static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c) } } -static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) +static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c) { int y; for(y=0; y<BLOCK_SIZE; y++){ @@ -337,7 +338,7 @@ static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c) * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block) * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version) */ -static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) +static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c) { int y; for(y=0; y<BLOCK_SIZE; y++){ @@ -380,11 +381,10 @@ static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c) static inline void horizX1Filter(uint8_t *src, int stride, int QP) { int y; - static uint64_t *lut= NULL; - if(lut==NULL) + static uint64_t lut[256]; + if(!lut[255]) { int i; - lut = av_malloc(256*8); for(i=0; i<256; i++) { int v= i < 128 ? 2*i : 2*(i-256); @@ -435,7 +435,9 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP) /** * accurate deblock filter */ -static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){ +static av_always_inline void do_a_deblock_C(uint8_t *src, int step, + int stride, const PPContext *c) +{ int y; const int QP= c->QP; const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1; @@ -536,141 +538,86 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one //Plain C versions -#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT -#define COMPILE_C -#endif - -#if HAVE_ALTIVEC -#define COMPILE_ALTIVEC -#endif //HAVE_ALTIVEC - -#if ARCH_X86 - -#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT -#define COMPILE_MMX -#endif - -#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT -#define COMPILE_MMX2 -#endif - -#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT -#define COMPILE_3DNOW -#endif -#endif /* ARCH_X86 */ - -#undef HAVE_MMX -#define HAVE_MMX 0 -#undef HAVE_MMX2 -#define HAVE_MMX2 0 -#undef HAVE_AMD3DNOW -#define HAVE_AMD3DNOW 0 -#undef HAVE_ALTIVEC -#define HAVE_ALTIVEC 0 - -#ifdef COMPILE_C -#define RENAME(a) a ## _C -#include "postprocess_template.c" -#endif - -#ifdef COMPILE_ALTIVEC -#undef RENAME -#undef HAVE_ALTIVEC -#define HAVE_ALTIVEC 1 -#define RENAME(a) a ## _altivec -#include "postprocess_altivec_template.c" +//we always compile C for testing which needs bitexactness +#define TEMPLATE_PP_C 1 #include "postprocess_template.c" -#endif - -//MMX versions -#ifdef COMPILE_MMX -#undef RENAME -#undef HAVE_MMX -#define HAVE_MMX 1 -#define RENAME(a) a ## _MMX -#include "postprocess_template.c" -#endif -//MMX2 versions -#ifdef COMPILE_MMX2 -#undef RENAME -#undef HAVE_MMX -#undef HAVE_MMX2 -#define HAVE_MMX 1 -#define HAVE_MMX2 1 -#define RENAME(a) a ## _MMX2 -#include "postprocess_template.c" +#if HAVE_ALTIVEC +# define TEMPLATE_PP_ALTIVEC 1 +# include "postprocess_altivec_template.c" +# include "postprocess_template.c" #endif -//3DNOW versions -#ifdef COMPILE_3DNOW -#undef RENAME -#undef HAVE_MMX -#undef HAVE_MMX2 -#undef HAVE_AMD3DNOW -#define HAVE_MMX 1 -#define HAVE_MMX2 0 -#define HAVE_AMD3DNOW 1 -#define RENAME(a) a ## _3DNow -#include "postprocess_template.c" +#if ARCH_X86 && HAVE_INLINE_ASM +# if CONFIG_RUNTIME_CPUDETECT +# define TEMPLATE_PP_MMX 1 +# include "postprocess_template.c" +# define TEMPLATE_PP_MMXEXT 1 +# include "postprocess_template.c" +# define TEMPLATE_PP_3DNOW 1 +# include "postprocess_template.c" +# define TEMPLATE_PP_SSE2 1 +# include "postprocess_template.c" +# else +# if HAVE_SSE2_INLINE +# define TEMPLATE_PP_SSE2 1 +# include "postprocess_template.c" +# elif HAVE_MMXEXT_INLINE +# define TEMPLATE_PP_MMXEXT 1 +# include "postprocess_template.c" +# elif HAVE_AMD3DNOW_INLINE +# define TEMPLATE_PP_3DNOW 1 +# include "postprocess_template.c" +# elif HAVE_MMX_INLINE +# define TEMPLATE_PP_MMX 1 +# include "postprocess_template.c" +# endif +# endif #endif -// minor note: the HAVE_xyz is messed up after that line so do not use it. +typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, + const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2); static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc) { + pp_fn pp = postProcess_C; PPContext *c= (PPContext *)vc; PPMode *ppMode= (PPMode *)vm; c->ppMode= *ppMode; //FIXME - // Using ifs here as they are faster than function pointers although the - // difference would not be measurable here but it is much better because - // someone might exchange the CPU whithout restarting MPlayer ;) + if (!(ppMode->lumMode & BITEXACT)) { #if CONFIG_RUNTIME_CPUDETECT -#if ARCH_X86 - // ordered per speed fastest first - if(c->cpuCaps & PP_CPU_CAPS_MMX2) - postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); - else if(c->cpuCaps & PP_CPU_CAPS_3DNOW) - postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); - else if(c->cpuCaps & PP_CPU_CAPS_MMX) - postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); - else - postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); -#else -#if HAVE_ALTIVEC - if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC) - postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); - else -#endif - postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); +#if ARCH_X86 && HAVE_INLINE_ASM + // ordered per speed fastest first + if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2; + else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2; + else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow; + else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX; +#elif HAVE_ALTIVEC + if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec; #endif #else /* CONFIG_RUNTIME_CPUDETECT */ -#if HAVE_MMX2 - postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); -#elif HAVE_AMD3DNOW - postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); -#elif HAVE_MMX - postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); +#if HAVE_SSE2_INLINE + pp = postProcess_SSE2; +#elif HAVE_MMXEXT_INLINE + pp = postProcess_MMX2; +#elif HAVE_AMD3DNOW_INLINE + pp = postProcess_3DNow; +#elif HAVE_MMX_INLINE + pp = postProcess_MMX; #elif HAVE_ALTIVEC - postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); -#else - postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); + pp = postProcess_altivec; #endif #endif /* !CONFIG_RUNTIME_CPUDETECT */ -} + } -//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, -// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode); + pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c); +} /* -pp Command line Help */ -#if LIBPOSTPROC_VERSION_INT < (52<<16) -const char *const pp_help= -#else const char pp_help[] = -#endif "Available postprocessing filters:\n" "Filters Options\n" "short long name short long option Description\n" @@ -723,6 +670,20 @@ pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality) struct PPMode *ppMode; char *filterToken; + if (!name) { + av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n"); + return NULL; + } + + if (!strcmp(name, "help")) { + const char *p; + for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) { + av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2)); + av_log(NULL, AV_LOG_INFO, "%s", temp); + } + return NULL; + } + ppMode= av_malloc(sizeof(PPMode)); ppMode->lumMode= 0; @@ -906,7 +867,7 @@ static void reallocBuffers(PPContext *c, int width, int height, int stride, int c->stride= stride; c->qpStride= qpStride; - reallocAlign((void **)&c->tempDst, 8, stride*24); + reallocAlign((void **)&c->tempDst, 8, stride*24+32); reallocAlign((void **)&c->tempSrc, 8, stride*24); reallocAlign((void **)&c->tempBlocks, 8, 2*16*8); reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t)); @@ -938,7 +899,6 @@ pp_context *pp_get_context(int width, int height, int cpuCaps){ memset(c, 0, sizeof(PPContext)); c->av_class = &av_codec_context_class; - c->cpuCaps= cpuCaps; if(cpuCaps&PP_FORMAT){ c->hChromaSubSample= cpuCaps&0x3; c->vChromaSubSample= (cpuCaps>>4)&0x3; @@ -946,6 +906,15 @@ pp_context *pp_get_context(int width, int height, int cpuCaps){ c->hChromaSubSample= 1; c->vChromaSubSample= 1; } + if (cpuCaps & PP_CPU_CAPS_AUTO) { + c->cpuCaps = av_get_cpu_flags(); + } else { + c->cpuCaps = 0; + if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX; + if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT; + if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW; + if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC; + } reallocBuffers(c, width, height, stride, qpStride); diff --git a/lib/ffmpeg/libpostproc/postprocess.h b/lib/ffmpeg/libpostproc/postprocess.h index c2c5c73240..928e01fe10 100644 --- a/lib/ffmpeg/libpostproc/postprocess.h +++ b/lib/ffmpeg/libpostproc/postprocess.h @@ -23,27 +23,16 @@ /** * @file - * @brief - * external postprocessing API + * @ingroup lpp + * external API header */ -#include "libavutil/avutil.h" - -#ifndef LIBPOSTPROC_VERSION_MAJOR -#define LIBPOSTPROC_VERSION_MAJOR 52 -#define LIBPOSTPROC_VERSION_MINOR 0 -#define LIBPOSTPROC_VERSION_MICRO 100 -#endif - -#define LIBPOSTPROC_VERSION_INT AV_VERSION_INT(LIBPOSTPROC_VERSION_MAJOR, \ - LIBPOSTPROC_VERSION_MINOR, \ - LIBPOSTPROC_VERSION_MICRO) -#define LIBPOSTPROC_VERSION AV_VERSION(LIBPOSTPROC_VERSION_MAJOR, \ - LIBPOSTPROC_VERSION_MINOR, \ - LIBPOSTPROC_VERSION_MICRO) -#define LIBPOSTPROC_BUILD LIBPOSTPROC_VERSION_INT +/** + * @defgroup lpp Libpostproc + * @{ + */ -#define LIBPOSTPROC_IDENT "postproc" AV_STRINGIFY(LIBPOSTPROC_VERSION) +#include "libpostproc/version.h" /** * Return the LIBPOSTPROC_VERSION_INT constant. @@ -100,6 +89,7 @@ void pp_free_context(pp_context *ppContext); #define PP_CPU_CAPS_MMX2 0x20000000 #define PP_CPU_CAPS_3DNOW 0x40000000 #define PP_CPU_CAPS_ALTIVEC 0x10000000 +#define PP_CPU_CAPS_AUTO 0x00080000 #define PP_FORMAT 0x00000008 #define PP_FORMAT_420 (0x00000011|PP_FORMAT) @@ -109,4 +99,8 @@ void pp_free_context(pp_context *ppContext); #define PP_PICT_TYPE_QP2 0x00000010 ///< MPEG2 style QScale +/** + * @} + */ + #endif /* POSTPROC_POSTPROCESS_H */ diff --git a/lib/ffmpeg/libpostproc/postprocess_altivec_template.c b/lib/ffmpeg/libpostproc/postprocess_altivec_template.c index 3a37562452..fa6ebe279d 100644 --- a/lib/ffmpeg/libpostproc/postprocess_altivec_template.c +++ b/lib/ffmpeg/libpostproc/postprocess_altivec_template.c @@ -825,7 +825,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) { #define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a) #define do_a_deblock_altivec(a...) do_a_deblock_C(a) -static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, +static inline void tempNoiseReducer_altivec(uint8_t *src, int stride, uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise) { const vector signed char neg1 = vec_splat_s8(-1); diff --git a/lib/ffmpeg/libpostproc/postprocess_internal.h b/lib/ffmpeg/libpostproc/postprocess_internal.h index 2256d78f78..b1b8f0c633 100644 --- a/lib/ffmpeg/libpostproc/postprocess_internal.h +++ b/lib/ffmpeg/libpostproc/postprocess_internal.h @@ -20,7 +20,7 @@ /** * @file - * internal api header. + * internal API header. */ #ifndef POSTPROC_POSTPROCESS_INTERNAL_H @@ -28,6 +28,7 @@ #include <string.h> #include "libavutil/avutil.h" +#include "libavutil/intmath.h" #include "libavutil/log.h" #include "postprocess.h" @@ -53,7 +54,7 @@ #define H_X1_FILTER 0x2000 // 8192 #define H_A_DEBLOCK 0x4000 -/// select between full y range (255-0) or standart one (234-16) +/// select between full y range (255-0) or standard one (234-16) #define FULL_Y_RANGE 0x8000 // 32768 //Deinterlacing Filters @@ -67,6 +68,7 @@ #define TEMP_NOISE_FILTER 0x100000 #define FORCE_QUANT 0x200000 +#define BITEXACT 0x1000000 //use if you want a faster postprocessing code //cannot differentiate between chroma & luma filters (both on or both off) @@ -74,12 +76,10 @@ //filters on //#define COMPILE_TIME_MODE 0x77 -static inline int CLIP(int a){ - if(a&256) return ((a)>>31)^(-1); - else return a; -} +#define CLIP av_clip_uint8 + /** - * Postprocessng filter. + * Postprocessing filter. */ struct PPFilter{ const char *shortName; @@ -91,15 +91,15 @@ struct PPFilter{ }; /** - * Postprocessng mode. + * Postprocessing mode. */ typedef struct PPMode{ - int lumMode; ///< acivates filters for luminance - int chromMode; ///< acivates filters for chrominance + int lumMode; ///< activates filters for luminance + int chromMode; ///< activates filters for chrominance int error; ///< non zero on error - int minAllowedY; ///< for brigtness correction - int maxAllowedY; ///< for brihtness correction + int minAllowedY; ///< for brightness correction + int maxAllowedY; ///< for brightness correction float maxClippedThreshold; ///< amount of "black" you are willing to lose to get a brightness-corrected picture int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences) diff --git a/lib/ffmpeg/libpostproc/postprocess_template.c b/lib/ffmpeg/libpostproc/postprocess_template.c index 4b8184c4f4..ad0404f76b 100644 --- a/lib/ffmpeg/libpostproc/postprocess_template.c +++ b/lib/ffmpeg/libpostproc/postprocess_template.c @@ -23,39 +23,88 @@ * mmx/mmx2/3dnow postprocess code. */ -#include "libavutil/x86_cpu.h" +#include "libavutil/x86/asm.h" + +/* A single TEMPLATE_PP_* should be defined (to 1) when this template is + * included. The following macros will define its dependencies to 1 as well + * (like MMX2 depending on MMX), and will define to 0 all the others. Every + * TEMPLATE_PP_* need to be undef at the end. */ + +#ifdef TEMPLATE_PP_C +# define RENAME(a) a ## _C +#else +# define TEMPLATE_PP_C 0 +#endif + +#ifdef TEMPLATE_PP_ALTIVEC +# define RENAME(a) a ## _altivec +#else +# define TEMPLATE_PP_ALTIVEC 0 +#endif + +#ifdef TEMPLATE_PP_MMX +# define RENAME(a) a ## _MMX +#else +# define TEMPLATE_PP_MMX 0 +#endif + +#ifdef TEMPLATE_PP_MMXEXT +# undef TEMPLATE_PP_MMX +# define TEMPLATE_PP_MMX 1 +# define RENAME(a) a ## _MMX2 +#else +# define TEMPLATE_PP_MMXEXT 0 +#endif + +#ifdef TEMPLATE_PP_3DNOW +# undef TEMPLATE_PP_MMX +# define TEMPLATE_PP_MMX 1 +# define RENAME(a) a ## _3DNow +#else +# define TEMPLATE_PP_3DNOW 0 +#endif + +#ifdef TEMPLATE_PP_SSE2 +# undef TEMPLATE_PP_MMX +# define TEMPLATE_PP_MMX 1 +# undef TEMPLATE_PP_MMXEXT +# define TEMPLATE_PP_MMXEXT 1 +# define RENAME(a) a ## _SSE2 +#else +# define TEMPLATE_PP_SSE2 0 +#endif #undef REAL_PAVGB #undef PAVGB #undef PMINUB #undef PMAXUB -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" -#elif HAVE_AMD3DNOW +#elif TEMPLATE_PP_3DNOW #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" #endif #define PAVGB(a,b) REAL_PAVGB(a,b) -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" -#elif HAVE_MMX +#elif TEMPLATE_PP_MMX #define PMINUB(b,a,t) \ "movq " #a ", " #t " \n\t"\ "psubusb " #b ", " #t " \n\t"\ "psubb " #t ", " #a " \n\t" #endif -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" -#elif HAVE_MMX +#elif TEMPLATE_PP_MMX #define PMAXUB(a,b) \ "psubusb " #a ", " #b " \n\t"\ "paddb " #a ", " #b " \n\t" #endif //FIXME? |255-0| = 1 (should not be a problem ...) -#if HAVE_MMX +#if TEMPLATE_PP_MMX /** * Check if the middle 8x8 Block in the given 8x16 block is flat */ @@ -79,7 +128,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ "movq %%mm0, %%mm4 \n\t" PMAXUB(%%mm1, %%mm4) PMINUB(%%mm1, %%mm3, %%mm5) - "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece + "psubb %%mm1, %%mm0 \n\t" // mm0 = difference "paddb %%mm7, %%mm0 \n\t" "pcmpgtb %%mm6, %%mm0 \n\t" @@ -135,7 +184,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ "psubusb %%mm3, %%mm4 \n\t" " \n\t" -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT "pxor %%mm7, %%mm7 \n\t" "psadbw %%mm7, %%mm0 \n\t" #else @@ -169,16 +218,16 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){ return 2; } } -#endif //HAVE_MMX +#endif //TEMPLATE_PP_MMX /** * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 */ -#if !HAVE_ALTIVEC +#if !TEMPLATE_PP_ALTIVEC static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) { -#if HAVE_MMX2 || HAVE_AMD3DNOW +#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW src+= stride*3; __asm__ volatile( //"movv %0 %1 %2\n\t" "movq %2, %%mm0 \n\t" // QP,..., QP @@ -305,7 +354,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) : "%"REG_a, "%"REG_c ); -#else //HAVE_MMX2 || HAVE_AMD3DNOW +#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW const int l1= stride; const int l2= stride + l1; const int l3= stride + l2; @@ -344,9 +393,9 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) src++; } -#endif //HAVE_MMX2 || HAVE_AMD3DNOW +#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW } -#endif //HAVE_ALTIVEC +#endif //TEMPLATE_PP_ALTIVEC /** * Experimental Filter 1 @@ -357,7 +406,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) */ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) { -#if HAVE_MMX2 || HAVE_AMD3DNOW +#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW src+= stride*3; __asm__ volatile( @@ -443,7 +492,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) : "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb) : "%"REG_a, "%"REG_c ); -#else //HAVE_MMX2 || HAVE_AMD3DNOW +#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW const int l1= stride; const int l2= stride + l1; @@ -477,13 +526,13 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) } src++; } -#endif //HAVE_MMX2 || HAVE_AMD3DNOW +#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW } -#if !HAVE_ALTIVEC +#if !TEMPLATE_PP_ALTIVEC static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) { -#if HAVE_MMX2 || HAVE_AMD3DNOW +#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW /* uint8_t tmp[16]; const int l1= stride; @@ -764,7 +813,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext } } */ -#elif HAVE_MMX +#elif TEMPLATE_PP_MMX DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars src+= stride*4; __asm__ volatile( @@ -872,7 +921,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "movq (%3), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq 8(%3), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm0, %%mm6 \n\t" "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| @@ -904,7 +953,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| #endif -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT "pminsw %%mm2, %%mm0 \n\t" "pminsw %%mm3, %%mm1 \n\t" #else @@ -968,7 +1017,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext "pand %%mm2, %%mm4 \n\t" "pand %%mm3, %%mm5 \n\t" -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT "pminsw %%mm0, %%mm4 \n\t" "pminsw %%mm1, %%mm5 \n\t" #else @@ -995,7 +1044,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp) : "%"REG_a ); -#else //HAVE_MMX2 || HAVE_AMD3DNOW +#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW const int l1= stride; const int l2= stride + l1; const int l3= stride + l2; @@ -1033,14 +1082,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext } src++; } -#endif //HAVE_MMX2 || HAVE_AMD3DNOW +#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW } -#endif //HAVE_ALTIVEC +#endif //TEMPLATE_PP_ALTIVEC -#if !HAVE_ALTIVEC +#if !TEMPLATE_PP_ALTIVEC static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) { -#if HAVE_MMX2 || HAVE_AMD3DNOW +#if HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) DECLARE_ALIGNED(8, uint64_t, tmp)[3]; __asm__ volatile( "pxor %%mm6, %%mm6 \n\t" @@ -1060,7 +1109,7 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) #undef REAL_FIND_MIN_MAX #undef FIND_MIN_MAX -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT #define REAL_FIND_MIN_MAX(addr)\ "movq " #addr ", %%mm0 \n\t"\ "pminub %%mm0, %%mm7 \n\t"\ @@ -1087,7 +1136,7 @@ FIND_MIN_MAX((%0, %1, 8)) "movq %%mm7, %%mm4 \n\t" "psrlq $8, %%mm7 \n\t" -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT "pminub %%mm4, %%mm7 \n\t" // min of pixels "pshufw $0xF9, %%mm7, %%mm4 \n\t" "pminub %%mm4, %%mm7 \n\t" // min of pixels @@ -1112,7 +1161,7 @@ FIND_MIN_MAX((%0, %1, 8)) "movq %%mm6, %%mm4 \n\t" "psrlq $8, %%mm6 \n\t" -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT "pmaxub %%mm4, %%mm6 \n\t" // max of pixels "pshufw $0xF9, %%mm6, %%mm4 \n\t" "pmaxub %%mm4, %%mm6 \n\t" @@ -1266,7 +1315,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, : : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp) : "%"REG_a, "%"REG_d ); -#else //HAVE_MMX2 || HAVE_AMD3DNOW +#else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) int y; int min=255; int max=0; @@ -1275,6 +1324,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, int s[10]; const int QP2= c->QP/2 + 1; + src --; for(y=1; y<9; y++){ int x; p= src + stride*y; @@ -1383,9 +1433,9 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; } #endif -#endif //HAVE_MMX2 || HAVE_AMD3DNOW +#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW } -#endif //HAVE_ALTIVEC +#endif //TEMPLATE_PP_ALTIVEC /** * Deinterlace the given block by linearly interpolating every second line. @@ -1395,7 +1445,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1, */ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) { -#if HAVE_MMX2 || HAVE_AMD3DNOW +#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW src+= 4*stride; __asm__ volatile( "lea (%0, %1), %%"REG_a" \n\t" @@ -1448,13 +1498,30 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid */ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) { -#if HAVE_MMX2 || HAVE_AMD3DNOW +#if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW src+= stride*3; __asm__ volatile( "lea (%0, %1), %%"REG_a" \n\t" "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" "lea (%%"REG_d", %1, 4), %%"REG_c" \n\t" "add %1, %%"REG_c" \n\t" +#if TEMPLATE_PP_SSE2 + "pxor %%xmm7, %%xmm7 \n\t" +#define REAL_DEINT_CUBIC(a,b,c,d,e)\ + "movq " #a ", %%xmm0 \n\t"\ + "movq " #b ", %%xmm1 \n\t"\ + "movq " #d ", %%xmm2 \n\t"\ + "movq " #e ", %%xmm3 \n\t"\ + "pavgb %%xmm2, %%xmm1 \n\t"\ + "pavgb %%xmm3, %%xmm0 \n\t"\ + "punpcklbw %%xmm7, %%xmm0 \n\t"\ + "punpcklbw %%xmm7, %%xmm1 \n\t"\ + "psubw %%xmm1, %%xmm0 \n\t"\ + "psraw $3, %%xmm0 \n\t"\ + "psubw %%xmm0, %%xmm1 \n\t"\ + "packuswb %%xmm1, %%xmm1 \n\t"\ + "movlps %%xmm1, " #c " \n\t" +#else //TEMPLATE_PP_SSE2 "pxor %%mm7, %%mm7 \n\t" // 0 1 2 3 4 5 6 7 8 9 10 // %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx @@ -1465,7 +1532,7 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride "movq " #d ", %%mm2 \n\t"\ "movq " #e ", %%mm3 \n\t"\ PAVGB(%%mm2, %%mm1) /* (b+d) /2 */\ - PAVGB(%%mm3, %%mm0) /* a(a+e) /2 */\ + PAVGB(%%mm3, %%mm0) /* (a+e) /2 */\ "movq %%mm0, %%mm2 \n\t"\ "punpcklbw %%mm7, %%mm0 \n\t"\ "punpckhbw %%mm7, %%mm2 \n\t"\ @@ -1480,6 +1547,7 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride "psubw %%mm2, %%mm3 \n\t" /* H(9b + 9d - a - e)/16 */\ "packuswb %%mm3, %%mm1 \n\t"\ "movq %%mm1, " #c " \n\t" +#endif //TEMPLATE_PP_SSE2 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e) DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1)) @@ -1488,9 +1556,14 @@ DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc)) DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2)) : : "r" (src), "r" ((x86_reg)stride) - : "%"REG_a, "%"REG_d, "%"REG_c + : +#if TEMPLATE_PP_SSE2 + XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm7",) +#endif + "%"REG_a, "%"REG_d, "%"REG_c ); -#else //HAVE_MMX2 || HAVE_AMD3DNOW +#undef REAL_DEINT_CUBIC +#else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW int x; src+= stride*3; for(x=0; x<8; x++){ @@ -1500,7 +1573,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); src++; } -#endif //HAVE_MMX2 || HAVE_AMD3DNOW +#endif //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW } /** @@ -1512,7 +1585,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, */ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) { -#if HAVE_MMX2 || HAVE_AMD3DNOW +#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW src+= stride*4; __asm__ volatile( "lea (%0, %1), %%"REG_a" \n\t" @@ -1561,7 +1634,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) : : "r" (src), "r" ((x86_reg)stride), "r"(tmp) : "%"REG_a, "%"REG_d ); -#else //HAVE_MMX2 || HAVE_AMD3DNOW +#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW int x; src+= stride*4; for(x=0; x<8; x++){ @@ -1579,7 +1652,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) src++; } -#endif //HAVE_MMX2 || HAVE_AMD3DNOW +#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW } /** @@ -1591,7 +1664,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) */ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) { -#if HAVE_MMX2 || HAVE_AMD3DNOW +#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW src+= stride*4; __asm__ volatile( "lea (%0, %1), %%"REG_a" \n\t" @@ -1651,7 +1724,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) : "%"REG_a, "%"REG_d ); -#else //HAVE_MMX2 || HAVE_AMD3DNOW +#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW int x; src+= stride*4; for(x=0; x<8; x++){ @@ -1680,7 +1753,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) src++; } -#endif //HAVE_MMX2 || HAVE_AMD3DNOW +#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW } /** @@ -1692,7 +1765,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4)) */ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) { -#if HAVE_MMX2 || HAVE_AMD3DNOW +#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW src+= 4*stride; __asm__ volatile( "lea (%0, %1), %%"REG_a" \n\t" @@ -1739,7 +1812,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin : : "r" (src), "r" ((x86_reg)stride), "r" (tmp) : "%"REG_a, "%"REG_d ); -#else //HAVE_MMX2 || HAVE_AMD3DNOW +#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW int a, b, c, x; src+= 4*stride; @@ -1782,7 +1855,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin src += 4; tmp += 4; } -#endif //HAVE_MMX2 || HAVE_AMD3DNOW +#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW } /** @@ -1793,9 +1866,9 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin */ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) { -#if HAVE_MMX +#if TEMPLATE_PP_MMX src+= 4*stride; -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT __asm__ volatile( "lea (%0, %1), %%"REG_a" \n\t" "lea (%%"REG_a", %1, 4), %%"REG_d" \n\t" @@ -1885,8 +1958,8 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) : : "r" (src), "r" ((x86_reg)stride) : "%"REG_a, "%"REG_d ); -#endif //HAVE_MMX2 -#else //HAVE_MMX +#endif //TEMPLATE_PP_MMXEXT +#else //TEMPLATE_PP_MMX int x, y; src+= 4*stride; // FIXME - there should be a way to do a few columns in parallel like w/mmx @@ -1905,10 +1978,10 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8)) } src++; } -#endif //HAVE_MMX +#endif //TEMPLATE_PP_MMX } -#if HAVE_MMX +#if TEMPLATE_PP_MMX /** * Transpose and shift the given 8x8 Block into dst1 and dst2. */ @@ -2073,10 +2146,10 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src) : "%"REG_a, "%"REG_d ); } -#endif //HAVE_MMX +#endif //TEMPLATE_PP_MMX //static long test=0; -#if !HAVE_ALTIVEC +#if !TEMPLATE_PP_ALTIVEC static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise) { @@ -2087,7 +2160,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, #define FAST_L2_DIFF //#define L1_DIFF //u should change the thresholds too if u try that one -#if HAVE_MMX2 || HAVE_AMD3DNOW +#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW __asm__ volatile( "lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride "lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride @@ -2375,7 +2448,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc)) :: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast) : "%"REG_a, "%"REG_d, "%"REG_c, "memory" ); -#else //HAVE_MMX2 || HAVE_AMD3DNOW +#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW { int y; int d=0; @@ -2458,11 +2531,11 @@ Switch between } } } -#endif //HAVE_MMX2 || HAVE_AMD3DNOW +#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW } -#endif //HAVE_ALTIVEC +#endif //TEMPLATE_PP_ALTIVEC -#if HAVE_MMX +#if TEMPLATE_PP_MMX /** * accurate deblock filter */ @@ -2486,7 +2559,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "movq (%%"REG_a"), %%mm1 \n\t" "movq %%mm1, %%mm3 \n\t" "movq %%mm1, %%mm4 \n\t" - "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece + "psubb %%mm1, %%mm0 \n\t" // mm0 = difference "paddb %%mm7, %%mm0 \n\t" "pcmpgtb %%mm6, %%mm0 \n\t" @@ -2865,7 +2938,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "movq (%4), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq 8(%4), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm0, %%mm6 \n\t" "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| @@ -2897,7 +2970,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| #endif -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT "pminsw %%mm2, %%mm0 \n\t" "pminsw %%mm3, %%mm1 \n\t" #else @@ -2961,7 +3034,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "pand %%mm2, %%mm4 \n\t" "pand %%mm3, %%mm5 \n\t" -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT "pminsw %%mm0, %%mm4 \n\t" "pminsw %%mm1, %%mm5 \n\t" #else @@ -2998,14 +3071,14 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st } } */ } -#endif //HAVE_MMX +#endif //TEMPLATE_PP_MMX static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c); /** * Copy a block from src to dst and fixes the blacklevel. - * levelFix == 0 -> do not touch the brighness & contrast + * levelFix == 0 -> do not touch the brightness & contrast */ #undef REAL_SCALED_CPY #undef SCALED_CPY @@ -3013,18 +3086,18 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride, int levelFix, int64_t *packedOffsetAndScale) { -#if !HAVE_MMX +#if !TEMPLATE_PP_MMX int i; #endif if(levelFix){ -#if HAVE_MMX +#if TEMPLATE_PP_MMX __asm__ volatile( "movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset "movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale "lea (%2,%4), %%"REG_a" \n\t" "lea (%3,%5), %%"REG_d" \n\t" "pxor %%mm4, %%mm4 \n\t" -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ "movq " #src1 ", %%mm0 \n\t"\ "movq " #src1 ", %%mm5 \n\t"\ @@ -3047,7 +3120,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t "movq %%mm0, " #dst1 " \n\t"\ "movq %%mm1, " #dst2 " \n\t"\ -#else //HAVE_MMX2 +#else //TEMPLATE_PP_MMXEXT #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ "movq " #src1 ", %%mm0 \n\t"\ "movq " #src1 ", %%mm5 \n\t"\ @@ -3074,7 +3147,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t "movq %%mm0, " #dst1 " \n\t"\ "movq %%mm1, " #dst2 " \n\t"\ -#endif //HAVE_MMX2 +#endif //TEMPLATE_PP_MMXEXT #define SCALED_CPY(src1, src2, dst1, dst2)\ REAL_SCALED_CPY(src1, src2, dst1, dst2) @@ -3094,13 +3167,13 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2)) "r" ((x86_reg)dstStride) : "%"REG_d ); -#else //HAVE_MMX +#else //TEMPLATE_PP_MMX for(i=0; i<8; i++) memcpy( &(dst[dstStride*i]), &(src[srcStride*i]), BLOCK_SIZE); -#endif //HAVE_MMX +#endif //TEMPLATE_PP_MMX }else{ -#if HAVE_MMX +#if TEMPLATE_PP_MMX __asm__ volatile( "lea (%0,%2), %%"REG_a" \n\t" "lea (%1,%3), %%"REG_d" \n\t" @@ -3127,11 +3200,11 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) "r" ((x86_reg)dstStride) : "%"REG_a, "%"REG_d ); -#else //HAVE_MMX +#else //TEMPLATE_PP_MMX for(i=0; i<8; i++) memcpy( &(dst[dstStride*i]), &(src[srcStride*i]), BLOCK_SIZE); -#endif //HAVE_MMX +#endif //TEMPLATE_PP_MMX } } @@ -3140,7 +3213,7 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2)) */ static inline void RENAME(duplicate)(uint8_t src[], int stride) { -#if HAVE_MMX +#if TEMPLATE_PP_MMX __asm__ volatile( "movq (%0), %%mm0 \n\t" "add %1, %0 \n\t" @@ -3168,8 +3241,8 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ { DECLARE_ALIGNED(8, PPContext, c)= *c2; //copy to stack for faster access int x,y; -#ifdef COMPILE_TIME_MODE - const int mode= COMPILE_TIME_MODE; +#ifdef TEMPLATE_PP_TIME_MODE + const int mode= TEMPLATE_PP_TIME_MODE; #else const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode; #endif @@ -3177,7 +3250,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ int QPCorrecture= 256*256; int copyAhead; -#if HAVE_MMX +#if TEMPLATE_PP_MMX int i; #endif @@ -3187,10 +3260,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ //FIXME remove uint64_t * const yHistogram= c.yHistogram; uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride; - uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride; + uint8_t * const tempDst= (dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride) + 32; //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4; -#if HAVE_MMX +#if TEMPLATE_PP_MMX for(i=0; i<57; i++){ int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1; int threshold= offset*2 + 1; @@ -3225,7 +3298,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ c.frameNum++; // first frame is fscked so we ignore it - if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256; + if(c.frameNum == 1) yHistogram[0]= width*(uint64_t)height/64*15/256; for(i=0; i<256; i++){ sum+= yHistogram[i]; @@ -3248,7 +3321,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black); -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT c.packedYScale= (uint16_t)(scale*256.0 + 0.5); c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF; #else @@ -3278,10 +3351,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ // From this point on it is guaranteed that we can read and write 16 lines downward // finish 1 block before the next otherwise we might have a problem - // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing + // with the L1 Cache of the P4 ... or only a few blocks at a time or something for(x=0; x<width; x+=BLOCK_SIZE){ -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT /* prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); @@ -3308,7 +3381,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ : "%"REG_a, "%"REG_d ); -#elif HAVE_AMD3DNOW +#elif TEMPLATE_PP_3DNOW //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... /* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); @@ -3354,7 +3427,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ //1% speedup if these are here instead of the inner loop const uint8_t *srcBlock= &(src[y*srcStride]); uint8_t *dstBlock= &(dst[y*dstStride]); -#if HAVE_MMX +#if TEMPLATE_PP_MMX uint8_t *tempBlock1= c.tempBlocks; uint8_t *tempBlock2= c.tempBlocks + 8; #endif @@ -3387,10 +3460,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ // From this point on it is guaranteed that we can read and write 16 lines downward // finish 1 block before the next otherwise we might have a problem - // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing + // with the L1 Cache of the P4 ... or only a few blocks at a time or something for(x=0; x<width; x+=BLOCK_SIZE){ const int stride= dstStride; -#if HAVE_MMX +#if TEMPLATE_PP_MMX uint8_t *tmpXchg; #endif if(isColor){ @@ -3404,7 +3477,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ yHistogram[ srcBlock[srcStride*12 + 4] ]++; } c.QP= QP; -#if HAVE_MMX +#if TEMPLATE_PP_MMX __asm__ volatile( "movd %1, %%mm7 \n\t" "packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP @@ -3417,7 +3490,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ #endif -#if HAVE_MMX2 +#if TEMPLATE_PP_MMXEXT /* prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32); prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32); @@ -3444,7 +3517,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ : "%"REG_a, "%"REG_d ); -#elif HAVE_AMD3DNOW +#elif TEMPLATE_PP_3DNOW //FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ... /* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32); prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32); @@ -3488,12 +3561,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ } } -#if HAVE_MMX +#if TEMPLATE_PP_MMX RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride); #endif /* check if we have a previous block to deblock it with dstBlock */ if(x - 8 >= 0){ -#if HAVE_MMX +#if TEMPLATE_PP_MMX if(mode & H_X1_FILTER) RENAME(vertX1Filter)(tempBlock1, 16, &c); else if(mode & H_DEBLOCK){ @@ -3514,7 +3587,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ if(mode & H_X1_FILTER) horizX1Filter(dstBlock-4, stride, QP); else if(mode & H_DEBLOCK){ -#if HAVE_ALTIVEC +#if TEMPLATE_PP_ALTIVEC DECLARE_ALIGNED(16, unsigned char, tempBlock)[272]; int t; transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride); @@ -3539,7 +3612,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ }else if(mode & H_A_DEBLOCK){ RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c); } -#endif //HAVE_MMX +#endif //TEMPLATE_PP_MMX if(mode & DERING){ //FIXME filter first line if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c); @@ -3549,7 +3622,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ { RENAME(tempNoiseReducer)(dstBlock-8, stride, c.tempBlurred[isColor] + y*dstStride + x, - c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3), + c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256, c.ppMode.maxTmpNoise); } } @@ -3557,7 +3630,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ dstBlock+=8; srcBlock+=8; -#if HAVE_MMX +#if TEMPLATE_PP_MMX tmpXchg= tempBlock1; tempBlock1= tempBlock2; tempBlock2 = tmpXchg; @@ -3571,7 +3644,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ if((mode & TEMP_NOISE_FILTER)){ RENAME(tempNoiseReducer)(dstBlock-8, dstStride, c.tempBlurred[isColor] + y*dstStride + x, - c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3), + c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256, c.ppMode.maxTmpNoise); } @@ -3597,9 +3670,9 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride]; }*/ } -#if HAVE_AMD3DNOW +#if TEMPLATE_PP_3DNOW __asm__ volatile("femms"); -#elif HAVE_MMX +#elif TEMPLATE_PP_MMX __asm__ volatile("emms"); #endif @@ -3629,3 +3702,11 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ *c2= c; //copy local context back } + +#undef RENAME +#undef TEMPLATE_PP_C +#undef TEMPLATE_PP_ALTIVEC +#undef TEMPLATE_PP_MMX +#undef TEMPLATE_PP_MMXEXT +#undef TEMPLATE_PP_3DNOW +#undef TEMPLATE_PP_SSE2 diff --git a/lib/ffmpeg/libpostproc/version.h b/lib/ffmpeg/libpostproc/version.h new file mode 100644 index 0000000000..d0d3d431f8 --- /dev/null +++ b/lib/ffmpeg/libpostproc/version.h @@ -0,0 +1,45 @@ +/* + * Version macros. + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef POSTPROC_POSTPROCESS_VERSION_H +#define POSTPROC_POSTPROCESS_VERSION_H + +/** + * @file + * Libpostproc version macros + */ + +#include "libavutil/avutil.h" + +#define LIBPOSTPROC_VERSION_MAJOR 52 +#define LIBPOSTPROC_VERSION_MINOR 2 +#define LIBPOSTPROC_VERSION_MICRO 100 + +#define LIBPOSTPROC_VERSION_INT AV_VERSION_INT(LIBPOSTPROC_VERSION_MAJOR, \ + LIBPOSTPROC_VERSION_MINOR, \ + LIBPOSTPROC_VERSION_MICRO) +#define LIBPOSTPROC_VERSION AV_VERSION(LIBPOSTPROC_VERSION_MAJOR, \ + LIBPOSTPROC_VERSION_MINOR, \ + LIBPOSTPROC_VERSION_MICRO) +#define LIBPOSTPROC_BUILD LIBPOSTPROC_VERSION_INT + +#define LIBPOSTPROC_IDENT "postproc" AV_STRINGIFY(LIBPOSTPROC_VERSION) + +#endif /* POSTPROC_POSTPROCESS_VERSION_H */ |