diff options
Diffstat (limited to 'tools/depends/target/libmpeg2/02-neon.patch')
-rw-r--r-- | tools/depends/target/libmpeg2/02-neon.patch | 385 |
1 files changed, 385 insertions, 0 deletions
diff --git a/tools/depends/target/libmpeg2/02-neon.patch b/tools/depends/target/libmpeg2/02-neon.patch new file mode 100644 index 0000000000..329551b65e --- /dev/null +++ b/tools/depends/target/libmpeg2/02-neon.patch @@ -0,0 +1,385 @@ +Index: include/mpeg2.h +=================================================================== +--- include/mpeg2.h (révision 1193) ++++ include/mpeg2.h (copie de travail) +@@ -164,6 +164,7 @@ + #define MPEG2_ACCEL_SPARC_VIS 1 + #define MPEG2_ACCEL_SPARC_VIS2 2 + #define MPEG2_ACCEL_ARM 1 ++#define MPEG2_ACCEL_ARM_NEON 2 + #define MPEG2_ACCEL_DETECT 0x80000000 + + uint32_t mpeg2_accel (uint32_t accel); +Index: libmpeg2/motion_comp_neon.c +=================================================================== +--- libmpeg2/motion_comp_neon.c (révision 0) ++++ libmpeg2/motion_comp_neon.c (révision 0) +@@ -0,0 +1,302 @@ ++/* ++ * motion_comp_neon.c ++ * Copyright (C) 2009 Rémi Denis-Courmont ++ * ++ * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. ++ * See http://libmpeg2.sourceforge.net/ for updates. ++ * ++ * mpeg2dec is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * mpeg2dec is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License along ++ * with mpeg2dec; if not, write to the Free Software Foundation, Inc., ++ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ++ */ ++ ++#include "config.h" ++ ++#if defined(ARCH_ARM) ++ ++#include <stdint.h> ++#include <string.h> ++ ++#include "mpeg2.h" ++#include "attributes.h" ++#include "mpeg2_internal.h" ++ ++/* dest = ref */ ++static void MC_put_o_16_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ do { ++ memcpy (dest, ref, 16); ++ ref += stride; ++ dest += stride; ++ } while (--height); ++} ++ ++static void MC_put_o_8_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ do { ++ memcpy (dest, ref, 8); ++ ref += stride; ++ dest += stride; ++ } while (--height); ++} ++ ++/* dest = (src1 + src2 + 1) / 2 */ ++static void MC_avg_1_16_neon (uint8_t * dest, const uint8_t * src1, ++ const uint8_t * src2, ++ const int stride, unsigned height) ++{ ++ do { ++ asm volatile ( ++ "vld1.u8 {q0}, [%[src1]]\n" ++ "vld1.u8 {q1}, [%[src2]]\n" ++ "vrhadd.u8 q0, q0, q1\n" ++ /* XXX: three cycles stall */ ++ "vst1.u8 {q0}, [%[dest]]\n" ++ : ++ : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2) ++ : "memory", "q0", "q1"); ++ src1 += stride; ++ src2 += stride; ++ dest += stride; ++ } while (--height); ++} ++ ++static void MC_avg_1_8_neon (uint8_t * dest, const uint8_t * src1, ++ const uint8_t * src2, ++ const int stride, unsigned height) ++{ ++ do { ++ asm volatile ( ++ "vld1.u8 {d0}, [%[src1]]\n" ++ "vld1.u8 {d1}, [%[src2]]\n" ++ "vrhadd.u8 d0, d0, d1\n" ++ "vst1.u8 {d0}, [%[dest]]\n" ++ : ++ : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2) ++ : "memory", "q0"); ++ ++ src1 += stride; ++ src2 += stride; ++ dest += stride; ++ } while (--height); ++} ++ ++/* dest = (dest + ((src1 + src2 + 1) / 2) + 1) / 2 */ ++static void MC_avg_2_16_neon (uint8_t * dest, const uint8_t * src1, ++ const uint8_t * src2, ++ const int stride, unsigned height) ++{ ++ do { ++ asm volatile ( ++ "vld1.u8 {q0}, [%[src1]]\n" ++ "vld1.u8 {q1}, [%[src2]]\n" ++ "vrhadd.u8 q0, q0, q1\n" ++ "vld1.u8 {q2}, [%[dest]]\n" ++ /* XXX: one cycle stall */ ++ "vrhadd.u8 q0, q0, q2\n" ++ /* XXX: three cycles stall */ ++ "vst1.u8 {q0}, [%[dest]]\n" ++ : ++ : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2) ++ : "memory", "q0", "q1", "q2"); ++ src1 += stride; ++ src2 += stride; ++ dest += stride; ++ } while (--height); ++} ++ ++static void MC_avg_2_8_neon (uint8_t * dest, const uint8_t * src1, ++ const uint8_t * src2, ++ const int stride, unsigned height) ++{ ++ do { ++ asm volatile ( ++ "vld1.u8 {d0}, [%[src1]]\n" ++ "vld1.u8 {d1}, [%[src2]]\n" ++ "vrhadd.u8 d0, d0, d1\n" ++ "vld1.u8 {d2}, [%[dest]]\n" ++ "vrhadd.u8 d0, d0, d2\n" ++ "vst1.u8 {d0}, [%[dest]]\n" ++ : ++ : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2) ++ : "memory", "q0", "d2"); ++ src1 += stride; ++ src2 += stride; ++ dest += stride; ++ } while (--height); ++} ++ ++static void MC_avg_o_16_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ MC_avg_1_16_neon (dest, dest, ref, stride, height); ++} ++ ++static void MC_avg_o_8_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ MC_avg_1_8_neon (dest, dest, ref, stride, height); ++} ++ ++static void MC_put_x_16_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ MC_avg_1_16_neon (dest, ref, ref + 1, stride, height); ++} ++ ++static void MC_put_x_8_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ MC_avg_1_8_neon (dest, ref, ref + 1, stride, height); ++} ++ ++static void MC_avg_x_16_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ MC_avg_2_16_neon (dest, ref, ref + 1, stride, height); ++} ++ ++static void MC_avg_x_8_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ MC_avg_2_8_neon (dest, ref, ref + 1, stride, height); ++} ++ ++static void MC_put_y_16_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ MC_avg_1_16_neon (dest, ref, ref + stride, stride, height); ++} ++static void MC_put_y_8_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ MC_avg_1_8_neon (dest, ref, ref + stride, stride, height); ++} ++ ++static void MC_avg_y_16_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ MC_avg_2_16_neon (dest, ref, ref + stride, stride, height); ++} ++ ++static void MC_avg_y_8_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ MC_avg_2_8_neon (dest, ref, ref + stride, stride, height); ++} ++ ++static void MC_put_xy_16_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ do { ++ asm volatile ( ++ "vld1.u8 {q0}, [%[ref]]\n" ++ "vld1.u8 {q1}, [%[refx]]\n" ++ "vrhadd.u8 q0, q0, q1\n" ++ "vld1.u8 {q2}, [%[refy]]\n" ++ "vld1.u8 {q3}, [%[refxy]]\n" ++ "vrhadd.u8 q2, q2, q3\n" ++ /* XXX: three cycles stall */ ++ "vrhadd.u8 q0, q0, q2\n" ++ /* XXX: three cycles stall */ ++ "vst1.u8 {q0}, [%[dest]]\n" ++ : ++ : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1), ++ [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1) ++ : "memory", "q0", "q1", "q2", "q3"); ++ ref += stride; ++ dest += stride; ++ } while (--height); ++} ++ ++static void MC_put_xy_8_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ do { ++ asm volatile ( ++ "vld1.u8 {d0}, [%[ref]]\n" ++ "vld1.u8 {d1}, [%[refx]]\n" ++ "vrhadd.u8 d0, d0, d1\n" ++ "vld1.u8 {d2}, [%[refy]]\n" ++ "vld1.u8 {d3}, [%[refxy]]\n" ++ "vrhadd.u8 d2, d2, d3\n" ++ /* XXX: three cycles stall */ ++ "vrhadd.u8 d0, d0, d2\n" ++ /* XXX: three cycles stall */ ++ "vst1.u8 {d0}, [%[dest]]\n" ++ : ++ : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1), ++ [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1) ++ : "memory", "q0", "q1"); ++ ref += stride; ++ dest += stride; ++ } while (--height); ++} ++ ++static void MC_avg_xy_16_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ do { ++ asm volatile ( ++ "vld1.u8 {q0}, [%[ref]]\n" ++ "vld1.u8 {q1}, [%[refx]]\n" ++ "vrhadd.u8 q0, q0, q1\n" ++ "vld1.u8 {q2}, [%[refy]]\n" ++ "vld1.u8 {q3}, [%[refxy]]\n" ++ "vrhadd.u8 q2, q2, q3\n" ++ "vld1.u8 {q4}, [%[dest]]\n" ++ /* XXX: one cycle stall */ ++ "vrhadd.u8 q0, q0, q2\n" ++ /* XXX: three cycles stall */ ++ "vrhadd.u8 q0, q4, q0\n" ++ "vst1.u8 {q0}, [%[dest]]\n" ++ : ++ : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1), ++ [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1) ++ : "memory", "q0", "q1", "q2", "q3", "q4"); ++ ref += stride; ++ dest += stride; ++ } while (--height); ++} ++ ++static void MC_avg_xy_8_neon (uint8_t * dest, const uint8_t * ref, ++ const int stride, int height) ++{ ++ do { ++ asm volatile ( ++ "vld1.u8 {d0}, [%[ref]]\n" ++ "vld1.u8 {d1}, [%[refx]]\n" ++ "vrhadd.u8 d0, d0, d1\n" ++ "vld1.u8 {d2}, [%[refy]]\n" ++ "vld1.u8 {d3}, [%[refxy]]\n" ++ "vrhadd.u8 d2, d2, d3\n" ++ "vld1.u8 {d4}, [%[dest]]\n" ++ /* XXX: one cycle stall */ ++ "vrhadd.u8 d0, d0, d2\n" ++ /* XXX: three cycles stall */ ++ "vrhadd.u8 d0, d4, d0\n" ++ "vst1.u8 {d0}, [%[dest]]\n" ++ : ++ : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1), ++ [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1) ++ : "memory", "q0", "q1", "d4"); ++ ref += stride; ++ dest += stride; ++ } while (--height); ++} ++ ++MPEG2_MC_EXTERN (neon) ++ ++#endif /* ARCH_ARM */ + +Modification de propriétés sur libmpeg2/motion_comp_neon.c +___________________________________________________________________ +Ajouté : svn:eol-style + + native + +Index: libmpeg2/mpeg2_internal.h +=================================================================== +--- libmpeg2/mpeg2_internal.h (révision 1193) ++++ libmpeg2/mpeg2_internal.h (copie de travail) +@@ -313,5 +313,6 @@ + extern mpeg2_mc_t mpeg2_mc_alpha; + extern mpeg2_mc_t mpeg2_mc_vis; + extern mpeg2_mc_t mpeg2_mc_arm; ++extern mpeg2_mc_t mpeg2_mc_neon; + + #endif /* LIBMPEG2_MPEG2_INTERNAL_H */ +Index: libmpeg2/motion_comp.c +=================================================================== +--- libmpeg2/motion_comp.c (révision 1193) ++++ libmpeg2/motion_comp.c (copie de travail) +@@ -58,6 +58,11 @@ + else + #endif + #ifdef ARCH_ARM ++#ifdef ARCH_ARM ++ if (accel & MPEG2_ACCEL_ARM) ++ mpeg2_mc = mpeg2_mc_neon; ++ else ++#endif + if (accel & MPEG2_ACCEL_ARM) { + mpeg2_mc = mpeg2_mc_arm; + } else +Index: libmpeg2/Makefile.am +=================================================================== +--- libmpeg2/Makefile.am (révision 1193) ++++ libmpeg2/Makefile.am (copie de travail) +@@ -14,7 +14,7 @@ + motion_comp_vis.c motion_comp_arm.c \ + cpu_accel.c cpu_state.c + if ARCH_ARM +-libmpeg2arch_la_SOURCES += motion_comp_arm_s.S ++libmpeg2arch_la_SOURCES += motion_comp_arm_s.S motion_comp_neon.c + endif + libmpeg2arch_la_CFLAGS = $(OPT_CFLAGS) $(ARCH_OPT_CFLAGS) $(LIBMPEG2_CFLAGS) + +Index: configure.ac +=================================================================== +--- configure.ac (révision 1193) ++++ configure.ac (copie de travail) +@@ -103,7 +103,14 @@ + AC_DEFINE([ARCH_ALPHA],,[alpha architecture]);; + arm*) + arm_conditional=: +- AC_DEFINE([ARCH_ARM],,[ARM architecture]);; ++ AC_DEFINE([ARCH_ARM],,[ARM architecture]) ++ AC_MSG_CHECKING([if inline ARM Advanced SIMD assembly is supported]) ++ AC_TRY_COMPILE([], ++ [asm ("vqmovun.s64 d0, q1":::"d0");], ++ [AC_DEFINE([ARCH_ARM_NEON],, [ARM Advanced SIMD assembly]) ++ AC_MSG_RESULT(yes)], ++ [AC_MSG_RESULT(no)]) ++ ;; + esac + elif test x"$CC" = x"tendracc"; then + dnl TenDRA portability checking compiler |