aboutsummaryrefslogtreecommitdiff
path: root/lib/ffmpeg/libpostproc
diff options
context:
space:
mode:
authorFlyingRat <flyingrat@outlook.com>2013-04-07 16:36:04 +0200
committerFlyingRat <flyingrat@outlook.com>2013-04-07 16:36:04 +0200
commit0e63a815aa6af63a21848e04b683d3f506dd41b1 (patch)
tree002f61d8a5b1d294d99fd4ba5b6982d76a612f0c /lib/ffmpeg/libpostproc
parent71862137c5337fc678681a23bfbc65f4db7a7b2f (diff)
[FFmpeg] version bump to n1.2 (rev e820e3a) - lib/ffmpeg
This commit now contains the original patches sub directory: patches - Org dir that contains applied xbmc custom patches. patches/README-patches - New README file with info about xbmc patches. patches/obsolete-patches - New dir with obsolete xbmc patches.
Diffstat (limited to 'lib/ffmpeg/libpostproc')
-rw-r--r--lib/ffmpeg/libpostproc/Makefile3
-rw-r--r--lib/ffmpeg/libpostproc/postprocess.c229
-rw-r--r--lib/ffmpeg/libpostproc/postprocess.h30
-rw-r--r--lib/ffmpeg/libpostproc/postprocess_altivec_template.c2
-rw-r--r--lib/ffmpeg/libpostproc/postprocess_internal.h24
-rw-r--r--lib/ffmpeg/libpostproc/postprocess_template.c293
-rw-r--r--lib/ffmpeg/libpostproc/version.h45
7 files changed, 358 insertions, 268 deletions
diff --git a/lib/ffmpeg/libpostproc/Makefile b/lib/ffmpeg/libpostproc/Makefile
index 6242157e71..3fb5a70806 100644
--- a/lib/ffmpeg/libpostproc/Makefile
+++ b/lib/ffmpeg/libpostproc/Makefile
@@ -3,6 +3,7 @@ include $(SUBDIR)../config.mak
NAME = postproc
FFLIBS = avutil
-HEADERS = postprocess.h
+HEADERS = postprocess.h \
+ version.h \
OBJS = postprocess.o
diff --git a/lib/ffmpeg/libpostproc/postprocess.c b/lib/ffmpeg/libpostproc/postprocess.c
index ed5c240012..facfd2cdd9 100644
--- a/lib/ffmpeg/libpostproc/postprocess.c
+++ b/lib/ffmpeg/libpostproc/postprocess.c
@@ -80,9 +80,9 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-//#undef HAVE_MMX2
-//#define HAVE_AMD3DNOW
-//#undef HAVE_MMX
+//#undef HAVE_MMXEXT_INLINE
+//#define HAVE_AMD3DNOW_INLINE
+//#undef HAVE_MMX_INLINE
//#undef ARCH_X86
//#define DEBUG_BRIGHTNESS
#include "postprocess.h"
@@ -116,7 +116,7 @@ const char *postproc_license(void)
#define TEMP_STRIDE 8
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
-#if ARCH_X86
+#if ARCH_X86 && HAVE_INLINE_ASM
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
@@ -130,7 +130,7 @@ DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
-static struct PPFilter filters[]=
+static const struct PPFilter filters[]=
{
{"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
{"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
@@ -150,6 +150,7 @@ static struct PPFilter filters[]=
{"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
{"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
{"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
+ {"be", "bitexact", 1, 0, 0, BITEXACT},
{NULL, NULL,0,0,0,0} //End Marker
};
@@ -164,7 +165,7 @@ static const char *replaceTable[]=
};
-#if ARCH_X86
+#if ARCH_X86 && HAVE_INLINE_ASM
static inline void prefetchnta(void *p)
{
__asm__ volatile( "prefetchnta (%0)\n\t"
@@ -200,7 +201,7 @@ static inline void prefetcht2(void *p)
/**
* Check if the given 8x8 Block is mostly "flat"
*/
-static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
+static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
{
int numEq= 0;
int y;
@@ -223,7 +224,7 @@ static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
/**
* Check if the middle 8x8 Block in the given 8x16 block is flat
*/
-static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
+static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
{
int numEq= 0;
int y;
@@ -245,7 +246,7 @@ static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
return numEq > c->ppMode.flatnessThreshold;
}
-static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
+static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
{
int i;
for(i=0; i<2; i++){
@@ -261,7 +262,7 @@ static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
return 1;
}
-static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
+static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
{
int x;
src+= stride*4;
@@ -274,7 +275,7 @@ static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
return 1;
}
-static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
+static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
{
if( isHorizDC_C(src, stride, c) ){
if( isHorizMinMaxOk_C(src, stride, c->QP) )
@@ -286,7 +287,7 @@ static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
}
}
-static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
+static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
{
if( isVertDC_C(src, stride, c) ){
if( isVertMinMaxOk_C(src, stride, c->QP) )
@@ -298,7 +299,7 @@ static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
}
}
-static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
+static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
{
int y;
for(y=0; y<BLOCK_SIZE; y++){
@@ -337,7 +338,7 @@ static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
* Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
*/
-static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
+static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
{
int y;
for(y=0; y<BLOCK_SIZE; y++){
@@ -380,11 +381,10 @@ static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
{
int y;
- static uint64_t *lut= NULL;
- if(lut==NULL)
+ static uint64_t lut[256];
+ if(!lut[255])
{
int i;
- lut = av_malloc(256*8);
for(i=0; i<256; i++)
{
int v= i < 128 ? 2*i : 2*(i-256);
@@ -435,7 +435,9 @@ static inline void horizX1Filter(uint8_t *src, int stride, int QP)
/**
* accurate deblock filter
*/
-static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
+static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
+ int stride, const PPContext *c)
+{
int y;
const int QP= c->QP;
const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
@@ -536,141 +538,86 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
//Plain C versions
-#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
-#define COMPILE_C
-#endif
-
-#if HAVE_ALTIVEC
-#define COMPILE_ALTIVEC
-#endif //HAVE_ALTIVEC
-
-#if ARCH_X86
-
-#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
-#define COMPILE_MMX
-#endif
-
-#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
-#define COMPILE_MMX2
-#endif
-
-#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
-#define COMPILE_3DNOW
-#endif
-#endif /* ARCH_X86 */
-
-#undef HAVE_MMX
-#define HAVE_MMX 0
-#undef HAVE_MMX2
-#define HAVE_MMX2 0
-#undef HAVE_AMD3DNOW
-#define HAVE_AMD3DNOW 0
-#undef HAVE_ALTIVEC
-#define HAVE_ALTIVEC 0
-
-#ifdef COMPILE_C
-#define RENAME(a) a ## _C
-#include "postprocess_template.c"
-#endif
-
-#ifdef COMPILE_ALTIVEC
-#undef RENAME
-#undef HAVE_ALTIVEC
-#define HAVE_ALTIVEC 1
-#define RENAME(a) a ## _altivec
-#include "postprocess_altivec_template.c"
+//we always compile C for testing which needs bitexactness
+#define TEMPLATE_PP_C 1
#include "postprocess_template.c"
-#endif
-
-//MMX versions
-#ifdef COMPILE_MMX
-#undef RENAME
-#undef HAVE_MMX
-#define HAVE_MMX 1
-#define RENAME(a) a ## _MMX
-#include "postprocess_template.c"
-#endif
-//MMX2 versions
-#ifdef COMPILE_MMX2
-#undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#define HAVE_MMX 1
-#define HAVE_MMX2 1
-#define RENAME(a) a ## _MMX2
-#include "postprocess_template.c"
+#if HAVE_ALTIVEC
+# define TEMPLATE_PP_ALTIVEC 1
+# include "postprocess_altivec_template.c"
+# include "postprocess_template.c"
#endif
-//3DNOW versions
-#ifdef COMPILE_3DNOW
-#undef RENAME
-#undef HAVE_MMX
-#undef HAVE_MMX2
-#undef HAVE_AMD3DNOW
-#define HAVE_MMX 1
-#define HAVE_MMX2 0
-#define HAVE_AMD3DNOW 1
-#define RENAME(a) a ## _3DNow
-#include "postprocess_template.c"
+#if ARCH_X86 && HAVE_INLINE_ASM
+# if CONFIG_RUNTIME_CPUDETECT
+# define TEMPLATE_PP_MMX 1
+# include "postprocess_template.c"
+# define TEMPLATE_PP_MMXEXT 1
+# include "postprocess_template.c"
+# define TEMPLATE_PP_3DNOW 1
+# include "postprocess_template.c"
+# define TEMPLATE_PP_SSE2 1
+# include "postprocess_template.c"
+# else
+# if HAVE_SSE2_INLINE
+# define TEMPLATE_PP_SSE2 1
+# include "postprocess_template.c"
+# elif HAVE_MMXEXT_INLINE
+# define TEMPLATE_PP_MMXEXT 1
+# include "postprocess_template.c"
+# elif HAVE_AMD3DNOW_INLINE
+# define TEMPLATE_PP_3DNOW 1
+# include "postprocess_template.c"
+# elif HAVE_MMX_INLINE
+# define TEMPLATE_PP_MMX 1
+# include "postprocess_template.c"
+# endif
+# endif
#endif
-// minor note: the HAVE_xyz is messed up after that line so do not use it.
+typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
+ const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
{
+ pp_fn pp = postProcess_C;
PPContext *c= (PPContext *)vc;
PPMode *ppMode= (PPMode *)vm;
c->ppMode= *ppMode; //FIXME
- // Using ifs here as they are faster than function pointers although the
- // difference would not be measurable here but it is much better because
- // someone might exchange the CPU whithout restarting MPlayer ;)
+ if (!(ppMode->lumMode & BITEXACT)) {
#if CONFIG_RUNTIME_CPUDETECT
-#if ARCH_X86
- // ordered per speed fastest first
- if(c->cpuCaps & PP_CPU_CAPS_MMX2)
- postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
- else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
- postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
- else if(c->cpuCaps & PP_CPU_CAPS_MMX)
- postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
- else
- postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#else
-#if HAVE_ALTIVEC
- if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
- postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
- else
-#endif
- postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+#if ARCH_X86 && HAVE_INLINE_ASM
+ // ordered per speed fastest first
+ if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
+ else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
+ else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
+ else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
+#elif HAVE_ALTIVEC
+ if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
#endif
#else /* CONFIG_RUNTIME_CPUDETECT */
-#if HAVE_MMX2
- postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#elif HAVE_AMD3DNOW
- postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#elif HAVE_MMX
- postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+#if HAVE_SSE2_INLINE
+ pp = postProcess_SSE2;
+#elif HAVE_MMXEXT_INLINE
+ pp = postProcess_MMX2;
+#elif HAVE_AMD3DNOW_INLINE
+ pp = postProcess_3DNow;
+#elif HAVE_MMX_INLINE
+ pp = postProcess_MMX;
#elif HAVE_ALTIVEC
- postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-#else
- postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+ pp = postProcess_altivec;
#endif
#endif /* !CONFIG_RUNTIME_CPUDETECT */
-}
+ }
-//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
-// QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
+ pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+}
/* -pp Command line Help
*/
-#if LIBPOSTPROC_VERSION_INT < (52<<16)
-const char *const pp_help=
-#else
const char pp_help[] =
-#endif
"Available postprocessing filters:\n"
"Filters Options\n"
"short long name short long option Description\n"
@@ -723,6 +670,20 @@ pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
struct PPMode *ppMode;
char *filterToken;
+ if (!name) {
+ av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
+ return NULL;
+ }
+
+ if (!strcmp(name, "help")) {
+ const char *p;
+ for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
+ av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
+ av_log(NULL, AV_LOG_INFO, "%s", temp);
+ }
+ return NULL;
+ }
+
ppMode= av_malloc(sizeof(PPMode));
ppMode->lumMode= 0;
@@ -906,7 +867,7 @@ static void reallocBuffers(PPContext *c, int width, int height, int stride, int
c->stride= stride;
c->qpStride= qpStride;
- reallocAlign((void **)&c->tempDst, 8, stride*24);
+ reallocAlign((void **)&c->tempDst, 8, stride*24+32);
reallocAlign((void **)&c->tempSrc, 8, stride*24);
reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
@@ -938,7 +899,6 @@ pp_context *pp_get_context(int width, int height, int cpuCaps){
memset(c, 0, sizeof(PPContext));
c->av_class = &av_codec_context_class;
- c->cpuCaps= cpuCaps;
if(cpuCaps&PP_FORMAT){
c->hChromaSubSample= cpuCaps&0x3;
c->vChromaSubSample= (cpuCaps>>4)&0x3;
@@ -946,6 +906,15 @@ pp_context *pp_get_context(int width, int height, int cpuCaps){
c->hChromaSubSample= 1;
c->vChromaSubSample= 1;
}
+ if (cpuCaps & PP_CPU_CAPS_AUTO) {
+ c->cpuCaps = av_get_cpu_flags();
+ } else {
+ c->cpuCaps = 0;
+ if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX;
+ if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
+ if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW;
+ if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
+ }
reallocBuffers(c, width, height, stride, qpStride);
diff --git a/lib/ffmpeg/libpostproc/postprocess.h b/lib/ffmpeg/libpostproc/postprocess.h
index c2c5c73240..928e01fe10 100644
--- a/lib/ffmpeg/libpostproc/postprocess.h
+++ b/lib/ffmpeg/libpostproc/postprocess.h
@@ -23,27 +23,16 @@
/**
* @file
- * @brief
- * external postprocessing API
+ * @ingroup lpp
+ * external API header
*/
-#include "libavutil/avutil.h"
-
-#ifndef LIBPOSTPROC_VERSION_MAJOR
-#define LIBPOSTPROC_VERSION_MAJOR 52
-#define LIBPOSTPROC_VERSION_MINOR 0
-#define LIBPOSTPROC_VERSION_MICRO 100
-#endif
-
-#define LIBPOSTPROC_VERSION_INT AV_VERSION_INT(LIBPOSTPROC_VERSION_MAJOR, \
- LIBPOSTPROC_VERSION_MINOR, \
- LIBPOSTPROC_VERSION_MICRO)
-#define LIBPOSTPROC_VERSION AV_VERSION(LIBPOSTPROC_VERSION_MAJOR, \
- LIBPOSTPROC_VERSION_MINOR, \
- LIBPOSTPROC_VERSION_MICRO)
-#define LIBPOSTPROC_BUILD LIBPOSTPROC_VERSION_INT
+/**
+ * @defgroup lpp Libpostproc
+ * @{
+ */
-#define LIBPOSTPROC_IDENT "postproc" AV_STRINGIFY(LIBPOSTPROC_VERSION)
+#include "libpostproc/version.h"
/**
* Return the LIBPOSTPROC_VERSION_INT constant.
@@ -100,6 +89,7 @@ void pp_free_context(pp_context *ppContext);
#define PP_CPU_CAPS_MMX2 0x20000000
#define PP_CPU_CAPS_3DNOW 0x40000000
#define PP_CPU_CAPS_ALTIVEC 0x10000000
+#define PP_CPU_CAPS_AUTO 0x00080000
#define PP_FORMAT 0x00000008
#define PP_FORMAT_420 (0x00000011|PP_FORMAT)
@@ -109,4 +99,8 @@ void pp_free_context(pp_context *ppContext);
#define PP_PICT_TYPE_QP2 0x00000010 ///< MPEG2 style QScale
+/**
+ * @}
+ */
+
#endif /* POSTPROC_POSTPROCESS_H */
diff --git a/lib/ffmpeg/libpostproc/postprocess_altivec_template.c b/lib/ffmpeg/libpostproc/postprocess_altivec_template.c
index 3a37562452..fa6ebe279d 100644
--- a/lib/ffmpeg/libpostproc/postprocess_altivec_template.c
+++ b/lib/ffmpeg/libpostproc/postprocess_altivec_template.c
@@ -825,7 +825,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
#define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a)
#define do_a_deblock_altivec(a...) do_a_deblock_C(a)
-static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
+static inline void tempNoiseReducer_altivec(uint8_t *src, int stride,
uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
{
const vector signed char neg1 = vec_splat_s8(-1);
diff --git a/lib/ffmpeg/libpostproc/postprocess_internal.h b/lib/ffmpeg/libpostproc/postprocess_internal.h
index 2256d78f78..b1b8f0c633 100644
--- a/lib/ffmpeg/libpostproc/postprocess_internal.h
+++ b/lib/ffmpeg/libpostproc/postprocess_internal.h
@@ -20,7 +20,7 @@
/**
* @file
- * internal api header.
+ * internal API header.
*/
#ifndef POSTPROC_POSTPROCESS_INTERNAL_H
@@ -28,6 +28,7 @@
#include <string.h>
#include "libavutil/avutil.h"
+#include "libavutil/intmath.h"
#include "libavutil/log.h"
#include "postprocess.h"
@@ -53,7 +54,7 @@
#define H_X1_FILTER 0x2000 // 8192
#define H_A_DEBLOCK 0x4000
-/// select between full y range (255-0) or standart one (234-16)
+/// select between full y range (255-0) or standard one (234-16)
#define FULL_Y_RANGE 0x8000 // 32768
//Deinterlacing Filters
@@ -67,6 +68,7 @@
#define TEMP_NOISE_FILTER 0x100000
#define FORCE_QUANT 0x200000
+#define BITEXACT 0x1000000
//use if you want a faster postprocessing code
//cannot differentiate between chroma & luma filters (both on or both off)
@@ -74,12 +76,10 @@
//filters on
//#define COMPILE_TIME_MODE 0x77
-static inline int CLIP(int a){
- if(a&256) return ((a)>>31)^(-1);
- else return a;
-}
+#define CLIP av_clip_uint8
+
/**
- * Postprocessng filter.
+ * Postprocessing filter.
*/
struct PPFilter{
const char *shortName;
@@ -91,15 +91,15 @@ struct PPFilter{
};
/**
- * Postprocessng mode.
+ * Postprocessing mode.
*/
typedef struct PPMode{
- int lumMode; ///< acivates filters for luminance
- int chromMode; ///< acivates filters for chrominance
+ int lumMode; ///< activates filters for luminance
+ int chromMode; ///< activates filters for chrominance
int error; ///< non zero on error
- int minAllowedY; ///< for brigtness correction
- int maxAllowedY; ///< for brihtness correction
+ int minAllowedY; ///< for brightness correction
+ int maxAllowedY; ///< for brightness correction
float maxClippedThreshold; ///< amount of "black" you are willing to lose to get a brightness-corrected picture
int maxTmpNoise[3]; ///< for Temporal Noise Reducing filter (Maximal sum of abs differences)
diff --git a/lib/ffmpeg/libpostproc/postprocess_template.c b/lib/ffmpeg/libpostproc/postprocess_template.c
index 4b8184c4f4..ad0404f76b 100644
--- a/lib/ffmpeg/libpostproc/postprocess_template.c
+++ b/lib/ffmpeg/libpostproc/postprocess_template.c
@@ -23,39 +23,88 @@
* mmx/mmx2/3dnow postprocess code.
*/
-#include "libavutil/x86_cpu.h"
+#include "libavutil/x86/asm.h"
+
+/* A single TEMPLATE_PP_* should be defined (to 1) when this template is
+ * included. The following macros will define its dependencies to 1 as well
+ * (like MMX2 depending on MMX), and will define to 0 all the others. Every
+ * TEMPLATE_PP_* need to be undef at the end. */
+
+#ifdef TEMPLATE_PP_C
+# define RENAME(a) a ## _C
+#else
+# define TEMPLATE_PP_C 0
+#endif
+
+#ifdef TEMPLATE_PP_ALTIVEC
+# define RENAME(a) a ## _altivec
+#else
+# define TEMPLATE_PP_ALTIVEC 0
+#endif
+
+#ifdef TEMPLATE_PP_MMX
+# define RENAME(a) a ## _MMX
+#else
+# define TEMPLATE_PP_MMX 0
+#endif
+
+#ifdef TEMPLATE_PP_MMXEXT
+# undef TEMPLATE_PP_MMX
+# define TEMPLATE_PP_MMX 1
+# define RENAME(a) a ## _MMX2
+#else
+# define TEMPLATE_PP_MMXEXT 0
+#endif
+
+#ifdef TEMPLATE_PP_3DNOW
+# undef TEMPLATE_PP_MMX
+# define TEMPLATE_PP_MMX 1
+# define RENAME(a) a ## _3DNow
+#else
+# define TEMPLATE_PP_3DNOW 0
+#endif
+
+#ifdef TEMPLATE_PP_SSE2
+# undef TEMPLATE_PP_MMX
+# define TEMPLATE_PP_MMX 1
+# undef TEMPLATE_PP_MMXEXT
+# define TEMPLATE_PP_MMXEXT 1
+# define RENAME(a) a ## _SSE2
+#else
+# define TEMPLATE_PP_SSE2 0
+#endif
#undef REAL_PAVGB
#undef PAVGB
#undef PMINUB
#undef PMAXUB
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
#define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
-#elif HAVE_AMD3DNOW
+#elif TEMPLATE_PP_3DNOW
#define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
#endif
#define PAVGB(a,b) REAL_PAVGB(a,b)
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
#define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
-#elif HAVE_MMX
+#elif TEMPLATE_PP_MMX
#define PMINUB(b,a,t) \
"movq " #a ", " #t " \n\t"\
"psubusb " #b ", " #t " \n\t"\
"psubb " #t ", " #a " \n\t"
#endif
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
#define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
-#elif HAVE_MMX
+#elif TEMPLATE_PP_MMX
#define PMAXUB(a,b) \
"psubusb " #a ", " #b " \n\t"\
"paddb " #a ", " #b " \n\t"
#endif
//FIXME? |255-0| = 1 (should not be a problem ...)
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
/**
* Check if the middle 8x8 Block in the given 8x16 block is flat
*/
@@ -79,7 +128,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
"movq %%mm0, %%mm4 \n\t"
PMAXUB(%%mm1, %%mm4)
PMINUB(%%mm1, %%mm3, %%mm5)
- "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
+ "psubb %%mm1, %%mm0 \n\t" // mm0 = difference
"paddb %%mm7, %%mm0 \n\t"
"pcmpgtb %%mm6, %%mm0 \n\t"
@@ -135,7 +184,7 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
"psubusb %%mm3, %%mm4 \n\t"
" \n\t"
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
"pxor %%mm7, %%mm7 \n\t"
"psadbw %%mm7, %%mm0 \n\t"
#else
@@ -169,16 +218,16 @@ static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
return 2;
}
}
-#endif //HAVE_MMX
+#endif //TEMPLATE_PP_MMX
/**
* Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16
*/
-#if !HAVE_ALTIVEC
+#if !TEMPLATE_PP_ALTIVEC
static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
+#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src+= stride*3;
__asm__ volatile( //"movv %0 %1 %2\n\t"
"movq %2, %%mm0 \n\t" // QP,..., QP
@@ -305,7 +354,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
: "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb)
: "%"REG_a, "%"REG_c
);
-#else //HAVE_MMX2 || HAVE_AMD3DNOW
+#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
const int l1= stride;
const int l2= stride + l1;
const int l3= stride + l2;
@@ -344,9 +393,9 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
src++;
}
-#endif //HAVE_MMX2 || HAVE_AMD3DNOW
+#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
}
-#endif //HAVE_ALTIVEC
+#endif //TEMPLATE_PP_ALTIVEC
/**
* Experimental Filter 1
@@ -357,7 +406,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
*/
static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
+#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src+= stride*3;
__asm__ volatile(
@@ -443,7 +492,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
: "r" (src), "r" ((x86_reg)stride), "m" (co->pQPb)
: "%"REG_a, "%"REG_c
);
-#else //HAVE_MMX2 || HAVE_AMD3DNOW
+#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
const int l1= stride;
const int l2= stride + l1;
@@ -477,13 +526,13 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
}
src++;
}
-#endif //HAVE_MMX2 || HAVE_AMD3DNOW
+#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
}
-#if !HAVE_ALTIVEC
+#if !TEMPLATE_PP_ALTIVEC
static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c)
{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
+#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
/*
uint8_t tmp[16];
const int l1= stride;
@@ -764,7 +813,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
}
}
*/
-#elif HAVE_MMX
+#elif TEMPLATE_PP_MMX
DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars
src+= stride*4;
__asm__ volatile(
@@ -872,7 +921,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"movq (%3), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"movq 8(%3), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
"movq %%mm7, %%mm6 \n\t" // 0
"psubw %%mm0, %%mm6 \n\t"
"pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
@@ -904,7 +953,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
#endif
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
"pminsw %%mm2, %%mm0 \n\t"
"pminsw %%mm3, %%mm1 \n\t"
#else
@@ -968,7 +1017,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"pand %%mm2, %%mm4 \n\t"
"pand %%mm3, %%mm5 \n\t"
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
"pminsw %%mm0, %%mm4 \n\t"
"pminsw %%mm1, %%mm5 \n\t"
#else
@@ -995,7 +1044,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
: "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp)
: "%"REG_a
);
-#else //HAVE_MMX2 || HAVE_AMD3DNOW
+#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
const int l1= stride;
const int l2= stride + l1;
const int l3= stride + l2;
@@ -1033,14 +1082,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
}
src++;
}
-#endif //HAVE_MMX2 || HAVE_AMD3DNOW
+#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
}
-#endif //HAVE_ALTIVEC
+#endif //TEMPLATE_PP_ALTIVEC
-#if !HAVE_ALTIVEC
+#if !TEMPLATE_PP_ALTIVEC
static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
+#if HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
DECLARE_ALIGNED(8, uint64_t, tmp)[3];
__asm__ volatile(
"pxor %%mm6, %%mm6 \n\t"
@@ -1060,7 +1109,7 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
#undef REAL_FIND_MIN_MAX
#undef FIND_MIN_MAX
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
#define REAL_FIND_MIN_MAX(addr)\
"movq " #addr ", %%mm0 \n\t"\
"pminub %%mm0, %%mm7 \n\t"\
@@ -1087,7 +1136,7 @@ FIND_MIN_MAX((%0, %1, 8))
"movq %%mm7, %%mm4 \n\t"
"psrlq $8, %%mm7 \n\t"
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
"pminub %%mm4, %%mm7 \n\t" // min of pixels
"pshufw $0xF9, %%mm7, %%mm4 \n\t"
"pminub %%mm4, %%mm7 \n\t" // min of pixels
@@ -1112,7 +1161,7 @@ FIND_MIN_MAX((%0, %1, 8))
"movq %%mm6, %%mm4 \n\t"
"psrlq $8, %%mm6 \n\t"
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
"pmaxub %%mm4, %%mm6 \n\t" // max of pixels
"pshufw $0xF9, %%mm6, %%mm4 \n\t"
"pmaxub %%mm4, %%mm6 \n\t"
@@ -1266,7 +1315,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
: : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb), "m"(c->pQPb2), "q"(tmp)
: "%"REG_a, "%"REG_d
);
-#else //HAVE_MMX2 || HAVE_AMD3DNOW
+#else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
int y;
int min=255;
int max=0;
@@ -1275,6 +1324,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
int s[10];
const int QP2= c->QP/2 + 1;
+ src --;
for(y=1; y<9; y++){
int x;
p= src + stride*y;
@@ -1383,9 +1433,9 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
// src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
}
#endif
-#endif //HAVE_MMX2 || HAVE_AMD3DNOW
+#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
}
-#endif //HAVE_ALTIVEC
+#endif //TEMPLATE_PP_ALTIVEC
/**
* Deinterlace the given block by linearly interpolating every second line.
@@ -1395,7 +1445,7 @@ DERING_CORE((%0, %1, 8) ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,
*/
static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
+#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src+= 4*stride;
__asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
@@ -1448,13 +1498,30 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
*/
static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
+#if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src+= stride*3;
__asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
"lea (%%"REG_d", %1, 4), %%"REG_c" \n\t"
"add %1, %%"REG_c" \n\t"
+#if TEMPLATE_PP_SSE2
+ "pxor %%xmm7, %%xmm7 \n\t"
+#define REAL_DEINT_CUBIC(a,b,c,d,e)\
+ "movq " #a ", %%xmm0 \n\t"\
+ "movq " #b ", %%xmm1 \n\t"\
+ "movq " #d ", %%xmm2 \n\t"\
+ "movq " #e ", %%xmm3 \n\t"\
+ "pavgb %%xmm2, %%xmm1 \n\t"\
+ "pavgb %%xmm3, %%xmm0 \n\t"\
+ "punpcklbw %%xmm7, %%xmm0 \n\t"\
+ "punpcklbw %%xmm7, %%xmm1 \n\t"\
+ "psubw %%xmm1, %%xmm0 \n\t"\
+ "psraw $3, %%xmm0 \n\t"\
+ "psubw %%xmm0, %%xmm1 \n\t"\
+ "packuswb %%xmm1, %%xmm1 \n\t"\
+ "movlps %%xmm1, " #c " \n\t"
+#else //TEMPLATE_PP_SSE2
"pxor %%mm7, %%mm7 \n\t"
// 0 1 2 3 4 5 6 7 8 9 10
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx
@@ -1465,7 +1532,7 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride
"movq " #d ", %%mm2 \n\t"\
"movq " #e ", %%mm3 \n\t"\
PAVGB(%%mm2, %%mm1) /* (b+d) /2 */\
- PAVGB(%%mm3, %%mm0) /* a(a+e) /2 */\
+ PAVGB(%%mm3, %%mm0) /* (a+e) /2 */\
"movq %%mm0, %%mm2 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpckhbw %%mm7, %%mm2 \n\t"\
@@ -1480,6 +1547,7 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride
"psubw %%mm2, %%mm3 \n\t" /* H(9b + 9d - a - e)/16 */\
"packuswb %%mm3, %%mm1 \n\t"\
"movq %%mm1, " #c " \n\t"
+#endif //TEMPLATE_PP_SSE2
#define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e)
DEINT_CUBIC((%0) , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1))
@@ -1488,9 +1556,14 @@ DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc))
DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc, %1, 2))
: : "r" (src), "r" ((x86_reg)stride)
- : "%"REG_a, "%"REG_d, "%"REG_c
+ :
+#if TEMPLATE_PP_SSE2
+ XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm7",)
+#endif
+ "%"REG_a, "%"REG_d, "%"REG_c
);
-#else //HAVE_MMX2 || HAVE_AMD3DNOW
+#undef REAL_DEINT_CUBIC
+#else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
int x;
src+= stride*3;
for(x=0; x<8; x++){
@@ -1500,7 +1573,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc,
src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
src++;
}
-#endif //HAVE_MMX2 || HAVE_AMD3DNOW
+#endif //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
}
/**
@@ -1512,7 +1585,7 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc) , (%%REGc,
*/
static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
+#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src+= stride*4;
__asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
@@ -1561,7 +1634,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
: : "r" (src), "r" ((x86_reg)stride), "r"(tmp)
: "%"REG_a, "%"REG_d
);
-#else //HAVE_MMX2 || HAVE_AMD3DNOW
+#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
int x;
src+= stride*4;
for(x=0; x<8; x++){
@@ -1579,7 +1652,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
src++;
}
-#endif //HAVE_MMX2 || HAVE_AMD3DNOW
+#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
}
/**
@@ -1591,7 +1664,7 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
*/
static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
+#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src+= stride*4;
__asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
@@ -1651,7 +1724,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
: : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2)
: "%"REG_a, "%"REG_d
);
-#else //HAVE_MMX2 || HAVE_AMD3DNOW
+#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
int x;
src+= stride*4;
for(x=0; x<8; x++){
@@ -1680,7 +1753,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
src++;
}
-#endif //HAVE_MMX2 || HAVE_AMD3DNOW
+#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
}
/**
@@ -1692,7 +1765,7 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
*/
static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
{
-#if HAVE_MMX2 || HAVE_AMD3DNOW
+#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src+= 4*stride;
__asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
@@ -1739,7 +1812,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
: : "r" (src), "r" ((x86_reg)stride), "r" (tmp)
: "%"REG_a, "%"REG_d
);
-#else //HAVE_MMX2 || HAVE_AMD3DNOW
+#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
int a, b, c, x;
src+= 4*stride;
@@ -1782,7 +1855,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
src += 4;
tmp += 4;
}
-#endif //HAVE_MMX2 || HAVE_AMD3DNOW
+#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
}
/**
@@ -1793,9 +1866,9 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
*/
static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
{
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
src+= 4*stride;
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
__asm__ volatile(
"lea (%0, %1), %%"REG_a" \n\t"
"lea (%%"REG_a", %1, 4), %%"REG_d" \n\t"
@@ -1885,8 +1958,8 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
: : "r" (src), "r" ((x86_reg)stride)
: "%"REG_a, "%"REG_d
);
-#endif //HAVE_MMX2
-#else //HAVE_MMX
+#endif //TEMPLATE_PP_MMXEXT
+#else //TEMPLATE_PP_MMX
int x, y;
src+= 4*stride;
// FIXME - there should be a way to do a few columns in parallel like w/mmx
@@ -1905,10 +1978,10 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
}
src++;
}
-#endif //HAVE_MMX
+#endif //TEMPLATE_PP_MMX
}
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
/**
* Transpose and shift the given 8x8 Block into dst1 and dst2.
*/
@@ -2073,10 +2146,10 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
: "%"REG_a, "%"REG_d
);
}
-#endif //HAVE_MMX
+#endif //TEMPLATE_PP_MMX
//static long test=0;
-#if !HAVE_ALTIVEC
+#if !TEMPLATE_PP_ALTIVEC
static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
{
@@ -2087,7 +2160,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
#define FAST_L2_DIFF
//#define L1_DIFF //u should change the thresholds too if u try that one
-#if HAVE_MMX2 || HAVE_AMD3DNOW
+#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
__asm__ volatile(
"lea (%2, %2, 2), %%"REG_a" \n\t" // 3*stride
"lea (%2, %2, 4), %%"REG_d" \n\t" // 5*stride
@@ -2375,7 +2448,7 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
:: "r" (src), "r" (tempBlurred), "r"((x86_reg)stride), "m" (tempBlurredPast)
: "%"REG_a, "%"REG_d, "%"REG_c, "memory"
);
-#else //HAVE_MMX2 || HAVE_AMD3DNOW
+#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
{
int y;
int d=0;
@@ -2458,11 +2531,11 @@ Switch between
}
}
}
-#endif //HAVE_MMX2 || HAVE_AMD3DNOW
+#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
}
-#endif //HAVE_ALTIVEC
+#endif //TEMPLATE_PP_ALTIVEC
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
/**
* accurate deblock filter
*/
@@ -2486,7 +2559,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"movq (%%"REG_a"), %%mm1 \n\t"
"movq %%mm1, %%mm3 \n\t"
"movq %%mm1, %%mm4 \n\t"
- "psubb %%mm1, %%mm0 \n\t" // mm0 = differnece
+ "psubb %%mm1, %%mm0 \n\t" // mm0 = difference
"paddb %%mm7, %%mm0 \n\t"
"pcmpgtb %%mm6, %%mm0 \n\t"
@@ -2865,7 +2938,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"movq (%4), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3
"movq 8(%4), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
"movq %%mm7, %%mm6 \n\t" // 0
"psubw %%mm0, %%mm6 \n\t"
"pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
@@ -2897,7 +2970,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
#endif
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
"pminsw %%mm2, %%mm0 \n\t"
"pminsw %%mm3, %%mm1 \n\t"
#else
@@ -2961,7 +3034,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pand %%mm2, %%mm4 \n\t"
"pand %%mm3, %%mm5 \n\t"
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
"pminsw %%mm0, %%mm4 \n\t"
"pminsw %%mm1, %%mm5 \n\t"
#else
@@ -2998,14 +3071,14 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
}
} */
}
-#endif //HAVE_MMX
+#endif //TEMPLATE_PP_MMX
static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
/**
* Copy a block from src to dst and fixes the blacklevel.
- * levelFix == 0 -> do not touch the brighness & contrast
+ * levelFix == 0 -> do not touch the brightness & contrast
*/
#undef REAL_SCALED_CPY
#undef SCALED_CPY
@@ -3013,18 +3086,18 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride,
int levelFix, int64_t *packedOffsetAndScale)
{
-#if !HAVE_MMX
+#if !TEMPLATE_PP_MMX
int i;
#endif
if(levelFix){
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
__asm__ volatile(
"movq (%%"REG_a"), %%mm2 \n\t" // packedYOffset
"movq 8(%%"REG_a"), %%mm3 \n\t" // packedYScale
"lea (%2,%4), %%"REG_a" \n\t"
"lea (%3,%5), %%"REG_d" \n\t"
"pxor %%mm4, %%mm4 \n\t"
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
"movq " #src1 ", %%mm0 \n\t"\
"movq " #src1 ", %%mm5 \n\t"\
@@ -3047,7 +3120,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
"movq %%mm0, " #dst1 " \n\t"\
"movq %%mm1, " #dst2 " \n\t"\
-#else //HAVE_MMX2
+#else //TEMPLATE_PP_MMXEXT
#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
"movq " #src1 ", %%mm0 \n\t"\
"movq " #src1 ", %%mm5 \n\t"\
@@ -3074,7 +3147,7 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
"movq %%mm0, " #dst1 " \n\t"\
"movq %%mm1, " #dst2 " \n\t"\
-#endif //HAVE_MMX2
+#endif //TEMPLATE_PP_MMXEXT
#define SCALED_CPY(src1, src2, dst1, dst2)\
REAL_SCALED_CPY(src1, src2, dst1, dst2)
@@ -3094,13 +3167,13 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
"r" ((x86_reg)dstStride)
: "%"REG_d
);
-#else //HAVE_MMX
+#else //TEMPLATE_PP_MMX
for(i=0; i<8; i++)
memcpy( &(dst[dstStride*i]),
&(src[srcStride*i]), BLOCK_SIZE);
-#endif //HAVE_MMX
+#endif //TEMPLATE_PP_MMX
}else{
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
__asm__ volatile(
"lea (%0,%2), %%"REG_a" \n\t"
"lea (%1,%3), %%"REG_d" \n\t"
@@ -3127,11 +3200,11 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
"r" ((x86_reg)dstStride)
: "%"REG_a, "%"REG_d
);
-#else //HAVE_MMX
+#else //TEMPLATE_PP_MMX
for(i=0; i<8; i++)
memcpy( &(dst[dstStride*i]),
&(src[srcStride*i]), BLOCK_SIZE);
-#endif //HAVE_MMX
+#endif //TEMPLATE_PP_MMX
}
}
@@ -3140,7 +3213,7 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
*/
static inline void RENAME(duplicate)(uint8_t src[], int stride)
{
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
__asm__ volatile(
"movq (%0), %%mm0 \n\t"
"add %1, %0 \n\t"
@@ -3168,8 +3241,8 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
{
DECLARE_ALIGNED(8, PPContext, c)= *c2; //copy to stack for faster access
int x,y;
-#ifdef COMPILE_TIME_MODE
- const int mode= COMPILE_TIME_MODE;
+#ifdef TEMPLATE_PP_TIME_MODE
+ const int mode= TEMPLATE_PP_TIME_MODE;
#else
const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
#endif
@@ -3177,7 +3250,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
int QPCorrecture= 256*256;
int copyAhead;
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
int i;
#endif
@@ -3187,10 +3260,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
//FIXME remove
uint64_t * const yHistogram= c.yHistogram;
uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
- uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
+ uint8_t * const tempDst= (dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride) + 32;
//const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
for(i=0; i<57; i++){
int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
int threshold= offset*2 + 1;
@@ -3225,7 +3298,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
c.frameNum++;
// first frame is fscked so we ignore it
- if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256;
+ if(c.frameNum == 1) yHistogram[0]= width*(uint64_t)height/64*15/256;
for(i=0; i<256; i++){
sum+= yHistogram[i];
@@ -3248,7 +3321,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
#else
@@ -3278,10 +3351,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
// From this point on it is guaranteed that we can read and write 16 lines downward
// finish 1 block before the next otherwise we might have a problem
- // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
+ // with the L1 Cache of the P4 ... or only a few blocks at a time or something
for(x=0; x<width; x+=BLOCK_SIZE){
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
/*
prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
@@ -3308,7 +3381,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
: "%"REG_a, "%"REG_d
);
-#elif HAVE_AMD3DNOW
+#elif TEMPLATE_PP_3DNOW
//FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
/* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
@@ -3354,7 +3427,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
//1% speedup if these are here instead of the inner loop
const uint8_t *srcBlock= &(src[y*srcStride]);
uint8_t *dstBlock= &(dst[y*dstStride]);
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
uint8_t *tempBlock1= c.tempBlocks;
uint8_t *tempBlock2= c.tempBlocks + 8;
#endif
@@ -3387,10 +3460,10 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
// From this point on it is guaranteed that we can read and write 16 lines downward
// finish 1 block before the next otherwise we might have a problem
- // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
+ // with the L1 Cache of the P4 ... or only a few blocks at a time or something
for(x=0; x<width; x+=BLOCK_SIZE){
const int stride= dstStride;
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
uint8_t *tmpXchg;
#endif
if(isColor){
@@ -3404,7 +3477,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
yHistogram[ srcBlock[srcStride*12 + 4] ]++;
}
c.QP= QP;
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
__asm__ volatile(
"movd %1, %%mm7 \n\t"
"packuswb %%mm7, %%mm7 \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
@@ -3417,7 +3490,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
#endif
-#if HAVE_MMX2
+#if TEMPLATE_PP_MMXEXT
/*
prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
@@ -3444,7 +3517,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
: "%"REG_a, "%"REG_d
);
-#elif HAVE_AMD3DNOW
+#elif TEMPLATE_PP_3DNOW
//FIXME check if this is faster on an 3dnow chip or if it is faster without the prefetch or ...
/* prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
@@ -3488,12 +3561,12 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
}
}
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
#endif
/* check if we have a previous block to deblock it with dstBlock */
if(x - 8 >= 0){
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
if(mode & H_X1_FILTER)
RENAME(vertX1Filter)(tempBlock1, 16, &c);
else if(mode & H_DEBLOCK){
@@ -3514,7 +3587,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
if(mode & H_X1_FILTER)
horizX1Filter(dstBlock-4, stride, QP);
else if(mode & H_DEBLOCK){
-#if HAVE_ALTIVEC
+#if TEMPLATE_PP_ALTIVEC
DECLARE_ALIGNED(16, unsigned char, tempBlock)[272];
int t;
transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
@@ -3539,7 +3612,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
}else if(mode & H_A_DEBLOCK){
RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
}
-#endif //HAVE_MMX
+#endif //TEMPLATE_PP_MMX
if(mode & DERING){
//FIXME filter first line
if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
@@ -3549,7 +3622,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
{
RENAME(tempNoiseReducer)(dstBlock-8, stride,
c.tempBlurred[isColor] + y*dstStride + x,
- c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3),
+ c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
c.ppMode.maxTmpNoise);
}
}
@@ -3557,7 +3630,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
dstBlock+=8;
srcBlock+=8;
-#if HAVE_MMX
+#if TEMPLATE_PP_MMX
tmpXchg= tempBlock1;
tempBlock1= tempBlock2;
tempBlock2 = tmpXchg;
@@ -3571,7 +3644,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
if((mode & TEMP_NOISE_FILTER)){
RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
c.tempBlurred[isColor] + y*dstStride + x,
- c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3),
+ c.tempBlurredPast[isColor] + (y>>3)*256 + (x>>3) + 256,
c.ppMode.maxTmpNoise);
}
@@ -3597,9 +3670,9 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
+ dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
}*/
}
-#if HAVE_AMD3DNOW
+#if TEMPLATE_PP_3DNOW
__asm__ volatile("femms");
-#elif HAVE_MMX
+#elif TEMPLATE_PP_MMX
__asm__ volatile("emms");
#endif
@@ -3629,3 +3702,11 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[
*c2= c; //copy local context back
}
+
+#undef RENAME
+#undef TEMPLATE_PP_C
+#undef TEMPLATE_PP_ALTIVEC
+#undef TEMPLATE_PP_MMX
+#undef TEMPLATE_PP_MMXEXT
+#undef TEMPLATE_PP_3DNOW
+#undef TEMPLATE_PP_SSE2
diff --git a/lib/ffmpeg/libpostproc/version.h b/lib/ffmpeg/libpostproc/version.h
new file mode 100644
index 0000000000..d0d3d431f8
--- /dev/null
+++ b/lib/ffmpeg/libpostproc/version.h
@@ -0,0 +1,45 @@
+/*
+ * Version macros.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef POSTPROC_POSTPROCESS_VERSION_H
+#define POSTPROC_POSTPROCESS_VERSION_H
+
+/**
+ * @file
+ * Libpostproc version macros
+ */
+
+#include "libavutil/avutil.h"
+
+#define LIBPOSTPROC_VERSION_MAJOR 52
+#define LIBPOSTPROC_VERSION_MINOR 2
+#define LIBPOSTPROC_VERSION_MICRO 100
+
+#define LIBPOSTPROC_VERSION_INT AV_VERSION_INT(LIBPOSTPROC_VERSION_MAJOR, \
+ LIBPOSTPROC_VERSION_MINOR, \
+ LIBPOSTPROC_VERSION_MICRO)
+#define LIBPOSTPROC_VERSION AV_VERSION(LIBPOSTPROC_VERSION_MAJOR, \
+ LIBPOSTPROC_VERSION_MINOR, \
+ LIBPOSTPROC_VERSION_MICRO)
+#define LIBPOSTPROC_BUILD LIBPOSTPROC_VERSION_INT
+
+#define LIBPOSTPROC_IDENT "postproc" AV_STRINGIFY(LIBPOSTPROC_VERSION)
+
+#endif /* POSTPROC_POSTPROCESS_VERSION_H */