summaryrefslogtreecommitdiffstats
path: root/abs/extra/libvpx/0001-Fix-for-issue-1114-compile-error.patch
diff options
context:
space:
mode:
Diffstat (limited to 'abs/extra/libvpx/0001-Fix-for-issue-1114-compile-error.patch')
-rw-r--r--abs/extra/libvpx/0001-Fix-for-issue-1114-compile-error.patch317
1 files changed, 317 insertions, 0 deletions
diff --git a/abs/extra/libvpx/0001-Fix-for-issue-1114-compile-error.patch b/abs/extra/libvpx/0001-Fix-for-issue-1114-compile-error.patch
new file mode 100644
index 0000000..0e01dd5
--- /dev/null
+++ b/abs/extra/libvpx/0001-Fix-for-issue-1114-compile-error.patch
@@ -0,0 +1,317 @@
+From 7361ef732b432e153496c30da66081d7e530c7f6 Mon Sep 17 00:00:00 2001
+From: Peter de Rivaz <peter.derivaz@argondesign.com>
+Date: Mon, 14 Dec 2015 16:35:29 +0000
+Subject: [PATCH] Fix for issue 1114 compile error
+
+In 32-bit build with --enable-shared, there is a lot of
+register pressure and register src_strideq is reused.
+The code needs to use the stack based version of src_stride,
+but this doesn't compile when used in an lea instruction.
+
+This patch also fixes a related segmentation fault caused by the
+implementation using src_strideq even though it has been
+reused.
+
+This patch also fixes the HBD subpel variance tests that fail
+when compiled without disable-optimizations.
+These failures were caused by local variables in the assembler
+routines colliding with the caller's stack frame.
+
+Change-Id: Ice9d4dafdcbdc6038ad5ee7c1c09a8f06deca362
+---
+ vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm | 18 +++----
+ vpx_dsp/x86/highbd_variance_sse2.c | 64 ++++++++++++++----------
+ 2 files changed, 44 insertions(+), 38 deletions(-)
+
+diff --git a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
+index 22d52a2..30ee81b 100644
+--- a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
++++ b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
+@@ -79,20 +79,13 @@ SECTION .text
+
+ %macro INC_SRC_BY_SRC_STRIDE 0
+ %if ARCH_X86=1 && CONFIG_PIC=1
+- lea srcq, [srcq + src_stridemp*2]
++ add srcq, src_stridemp
++ add srcq, src_stridemp
+ %else
+ lea srcq, [srcq + src_strideq*2]
+ %endif
+ %endmacro
+
+-%macro INC_SRC_BY_SRC_2STRIDE 0
+-%if ARCH_X86=1 && CONFIG_PIC=1
+- lea srcq, [srcq + src_stridemp*4]
+-%else
+- lea srcq, [srcq + src_strideq*4]
+-%endif
+-%endmacro
+-
+ %macro SUBPEL_VARIANCE 1-2 0 ; W
+ %define bilin_filter_m bilin_filter_m_sse2
+ %define filter_idx_shift 5
+@@ -984,8 +977,9 @@ SECTION .text
+ .x_other_y_other_loop:
+ movu m2, [srcq]
+ movu m4, [srcq+2]
+- movu m3, [srcq+src_strideq*2]
+- movu m5, [srcq+src_strideq*2+2]
++ INC_SRC_BY_SRC_STRIDE
++ movu m3, [srcq]
++ movu m5, [srcq+2]
+ pmullw m2, filter_x_a
+ pmullw m4, filter_x_b
+ paddw m2, filter_rnd
+@@ -1018,7 +1012,7 @@ SECTION .text
+ SUM_SSE m0, m2, m4, m3, m6, m7
+ mova m0, m5
+
+- INC_SRC_BY_SRC_2STRIDE
++ INC_SRC_BY_SRC_STRIDE
+ lea dstq, [dstq + dst_strideq * 4]
+ %if %2 == 1 ; avg
+ add secq, sec_str
+diff --git a/vpx_dsp/x86/highbd_variance_sse2.c b/vpx_dsp/x86/highbd_variance_sse2.c
+index b45331c..81ec5db 100644
+--- a/vpx_dsp/x86/highbd_variance_sse2.c
++++ b/vpx_dsp/x86/highbd_variance_sse2.c
+@@ -243,13 +243,18 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride,
+ }
+
+ #if CONFIG_USE_X86INC
++// The 2 unused parameters are place holders for PIC enabled build.
++// These definitions are for functions defined in
++// highbd_subpel_variance_impl_sse2.asm
+ #define DECL(w, opt) \
+ int vpx_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \
+ ptrdiff_t src_stride, \
+ int x_offset, int y_offset, \
+ const uint16_t *dst, \
+ ptrdiff_t dst_stride, \
+- int height, unsigned int *sse);
++ int height, \
++ unsigned int *sse, \
++ void *unused0, void *unused);
+ #define DECLS(opt1, opt2) \
+ DECL(8, opt1); \
+ DECL(16, opt1)
+@@ -274,7 +279,7 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
+ int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, h, \
+- &sse); \
++ &sse, NULL, NULL); \
+ if (w > wf) { \
+ unsigned int sse2; \
+ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
+@@ -282,19 +287,20 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \
+ x_offset, y_offset, \
+ dst + 16, \
+ dst_stride, \
+- h, &sse2); \
++ h, &sse2, \
++ NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ if (w > wf * 2) { \
+ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
+ x_offset, y_offset, \
+ dst + 32, dst_stride, \
+- h, &sse2); \
++ h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
+ src + 48, src_stride, x_offset, y_offset, \
+- dst + 48, dst_stride, h, &sse2); \
++ dst + 48, dst_stride, h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ } \
+@@ -312,7 +318,7 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
+ int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \
+ x_offset, y_offset, \
+ dst, dst_stride, \
+- h, &sse); \
++ h, &sse, NULL, NULL); \
+ if (w > wf) { \
+ uint32_t sse2; \
+ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \
+@@ -320,20 +326,21 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \
+ x_offset, y_offset, \
+ dst + 16, \
+ dst_stride, \
+- h, &sse2); \
++ h, &sse2, \
++ NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ if (w > wf * 2) { \
+ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \
+ x_offset, y_offset, \
+ dst + 32, dst_stride, \
+- h, &sse2); \
++ h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \
+ x_offset, y_offset, \
+ dst + 48, dst_stride, \
+- h, &sse2); \
++ h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ } \
+@@ -359,27 +366,27 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \
+ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
+ src + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, dst + (start_row * dst_stride), \
+- dst_stride, height, &sse2); \
++ dst_stride, height, &sse2, NULL, NULL); \
+ se += se2; \
+ long_sse += sse2; \
+ if (w > wf) { \
+ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
+ src + 16 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, dst + 16 + (start_row * dst_stride), \
+- dst_stride, height, &sse2); \
++ dst_stride, height, &sse2, NULL, NULL); \
+ se += se2; \
+ long_sse += sse2; \
+ if (w > wf * 2) { \
+ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
+ src + 32 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, dst + 32 + (start_row * dst_stride), \
+- dst_stride, height, &sse2); \
++ dst_stride, height, &sse2, NULL, NULL); \
+ se += se2; \
+ long_sse += sse2; \
+ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \
+ src + 48 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, dst + 48 + (start_row * dst_stride), \
+- dst_stride, height, &sse2); \
++ dst_stride, height, &sse2, NULL, NULL); \
+ se += se2; \
+ long_sse += sse2; \
+ }\
+@@ -410,6 +417,7 @@ FNS(sse2, sse);
+ #undef FNS
+ #undef FN
+
++// The 2 unused parameters are place holders for PIC enabled build.
+ #define DECL(w, opt) \
+ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
+ ptrdiff_t src_stride, \
+@@ -419,7 +427,8 @@ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \
+ const uint16_t *sec, \
+ ptrdiff_t sec_stride, \
+ int height, \
+- unsigned int *sse);
++ unsigned int *sse, \
++ void *unused0, void *unused);
+ #define DECLS(opt1) \
+ DECL(16, opt1) \
+ DECL(8, opt1)
+@@ -439,23 +448,23 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \
+ uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \
+ int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src, src_stride, x_offset, \
+- y_offset, dst, dst_stride, sec, w, h, &sse); \
++ y_offset, dst, dst_stride, sec, w, h, &sse, NULL, NULL); \
+ if (w > wf) { \
+ uint32_t sse2; \
+ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 16, src_stride, x_offset, y_offset, \
+- dst + 16, dst_stride, sec + 16, w, h, &sse2); \
++ dst + 16, dst_stride, sec + 16, w, h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ if (w > wf * 2) { \
+ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 32, src_stride, x_offset, y_offset, \
+- dst + 32, dst_stride, sec + 32, w, h, &sse2); \
++ dst + 32, dst_stride, sec + 32, w, h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 48, src_stride, x_offset, y_offset, \
+- dst + 48, dst_stride, sec + 48, w, h, &sse2); \
++ dst + 48, dst_stride, sec + 48, w, h, &sse2, NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ } \
+@@ -475,14 +484,15 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
+ int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src, src_stride, x_offset, \
+ y_offset, dst, dst_stride, \
+- sec, w, h, &sse); \
++ sec, w, h, &sse, NULL, NULL); \
+ if (w > wf) { \
+ uint32_t sse2; \
+ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 16, src_stride, \
+ x_offset, y_offset, \
+ dst + 16, dst_stride, \
+- sec + 16, w, h, &sse2); \
++ sec + 16, w, h, &sse2, \
++ NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ if (w > wf * 2) { \
+@@ -490,14 +500,16 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \
+ src + 32, src_stride, \
+ x_offset, y_offset, \
+ dst + 32, dst_stride, \
+- sec + 32, w, h, &sse2); \
++ sec + 32, w, h, &sse2, \
++ NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 48, src_stride, \
+ x_offset, y_offset, \
+ dst + 48, dst_stride, \
+- sec + 48, w, h, &sse2); \
++ sec + 48, w, h, &sse2, \
++ NULL, NULL); \
+ se += se2; \
+ sse += sse2; \
+ } \
+@@ -525,7 +537,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
+ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + (start_row * src_stride), src_stride, x_offset, \
+ y_offset, dst + (start_row * dst_stride), dst_stride, \
+- sec + (start_row * w), w, height, &sse2); \
++ sec + (start_row * w), w, height, &sse2, NULL, NULL); \
+ se += se2; \
+ long_sse += sse2; \
+ if (w > wf) { \
+@@ -533,7 +545,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
+ src + 16 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, \
+ dst + 16 + (start_row * dst_stride), dst_stride, \
+- sec + 16 + (start_row * w), w, height, &sse2); \
++ sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \
+ se += se2; \
+ long_sse += sse2; \
+ if (w > wf * 2) { \
+@@ -541,14 +553,14 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \
+ src + 32 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, \
+ dst + 32 + (start_row * dst_stride), dst_stride, \
+- sec + 32 + (start_row * w), w, height, &sse2); \
++ sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \
+ se += se2; \
+ long_sse += sse2; \
+ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \
+ src + 48 + (start_row * src_stride), src_stride, \
+ x_offset, y_offset, \
+ dst + 48 + (start_row * dst_stride), dst_stride, \
+- sec + 48 + (start_row * w), w, height, &sse2); \
++ sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \
+ se += se2; \
+ long_sse += sse2; \
+ } \
+--
+2.7.0
+