diff options
Diffstat (limited to 'abs/extra/libvpx/0001-Fix-for-issue-1114-compile-error.patch')
-rw-r--r-- | abs/extra/libvpx/0001-Fix-for-issue-1114-compile-error.patch | 317 |
1 files changed, 317 insertions, 0 deletions
diff --git a/abs/extra/libvpx/0001-Fix-for-issue-1114-compile-error.patch b/abs/extra/libvpx/0001-Fix-for-issue-1114-compile-error.patch new file mode 100644 index 0000000..0e01dd5 --- /dev/null +++ b/abs/extra/libvpx/0001-Fix-for-issue-1114-compile-error.patch @@ -0,0 +1,317 @@ +From 7361ef732b432e153496c30da66081d7e530c7f6 Mon Sep 17 00:00:00 2001 +From: Peter de Rivaz <peter.derivaz@argondesign.com> +Date: Mon, 14 Dec 2015 16:35:29 +0000 +Subject: [PATCH] Fix for issue 1114 compile error + +In 32-bit build with --enable-shared, there is a lot of +register pressure and register src_strideq is reused. +The code needs to use the stack based version of src_stride, +but this doesn't compile when used in an lea instruction. + +This patch also fixes a related segmentation fault caused by the +implementation using src_strideq even though it has been +reused. + +This patch also fixes the HBD subpel variance tests that fail +when compiled without disable-optimizations. +These failures were caused by local variables in the assembler +routines colliding with the caller's stack frame. + +Change-Id: Ice9d4dafdcbdc6038ad5ee7c1c09a8f06deca362 +--- + vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm | 18 +++---- + vpx_dsp/x86/highbd_variance_sse2.c | 64 ++++++++++++++---------- + 2 files changed, 44 insertions(+), 38 deletions(-) + +diff --git a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm +index 22d52a2..30ee81b 100644 +--- a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm ++++ b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm +@@ -79,20 +79,13 @@ SECTION .text + + %macro INC_SRC_BY_SRC_STRIDE 0 + %if ARCH_X86=1 && CONFIG_PIC=1 +- lea srcq, [srcq + src_stridemp*2] ++ add srcq, src_stridemp ++ add srcq, src_stridemp + %else + lea srcq, [srcq + src_strideq*2] + %endif + %endmacro + +-%macro INC_SRC_BY_SRC_2STRIDE 0 +-%if ARCH_X86=1 && CONFIG_PIC=1 +- lea srcq, [srcq + src_stridemp*4] +-%else +- lea srcq, [srcq + src_strideq*4] +-%endif +-%endmacro +- + %macro SUBPEL_VARIANCE 1-2 0 ; W + %define bilin_filter_m bilin_filter_m_sse2 + %define filter_idx_shift 5 +@@ -984,8 +977,9 @@ SECTION .text + .x_other_y_other_loop: + movu m2, [srcq] + movu m4, [srcq+2] +- movu m3, [srcq+src_strideq*2] +- movu m5, [srcq+src_strideq*2+2] ++ INC_SRC_BY_SRC_STRIDE ++ movu m3, [srcq] ++ movu m5, [srcq+2] + pmullw m2, filter_x_a + pmullw m4, filter_x_b + paddw m2, filter_rnd +@@ -1018,7 +1012,7 @@ SECTION .text + SUM_SSE m0, m2, m4, m3, m6, m7 + mova m0, m5 + +- INC_SRC_BY_SRC_2STRIDE ++ INC_SRC_BY_SRC_STRIDE + lea dstq, [dstq + dst_strideq * 4] + %if %2 == 1 ; avg + add secq, sec_str +diff --git a/vpx_dsp/x86/highbd_variance_sse2.c b/vpx_dsp/x86/highbd_variance_sse2.c +index b45331c..81ec5db 100644 +--- a/vpx_dsp/x86/highbd_variance_sse2.c ++++ b/vpx_dsp/x86/highbd_variance_sse2.c +@@ -243,13 +243,18 @@ unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride, + } + + #if CONFIG_USE_X86INC ++// The 2 unused parameters are place holders for PIC enabled build. ++// These definitions are for functions defined in ++// highbd_subpel_variance_impl_sse2.asm + #define DECL(w, opt) \ + int vpx_highbd_sub_pixel_variance##w##xh_##opt(const uint16_t *src, \ + ptrdiff_t src_stride, \ + int x_offset, int y_offset, \ + const uint16_t *dst, \ + ptrdiff_t dst_stride, \ +- int height, unsigned int *sse); ++ int height, \ ++ unsigned int *sse, \ ++ void *unused0, void *unused); + #define DECLS(opt1, opt2) \ + DECL(8, opt1); \ + DECL(16, opt1) +@@ -274,7 +279,7 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \ + int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \ + x_offset, y_offset, \ + dst, dst_stride, h, \ +- &sse); \ ++ &sse, NULL, NULL); \ + if (w > wf) { \ + unsigned int sse2; \ + int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \ +@@ -282,19 +287,20 @@ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt(const uint8_t *src8, \ + x_offset, y_offset, \ + dst + 16, \ + dst_stride, \ +- h, &sse2); \ ++ h, &sse2, \ ++ NULL, NULL); \ + se += se2; \ + sse += sse2; \ + if (w > wf * 2) { \ + se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ + x_offset, y_offset, \ + dst + 32, dst_stride, \ +- h, &sse2); \ ++ h, &sse2, NULL, NULL); \ + se += se2; \ + sse += sse2; \ + se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ + src + 48, src_stride, x_offset, y_offset, \ +- dst + 48, dst_stride, h, &sse2); \ ++ dst + 48, dst_stride, h, &sse2, NULL, NULL); \ + se += se2; \ + sse += sse2; \ + } \ +@@ -312,7 +318,7 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \ + int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src, src_stride, \ + x_offset, y_offset, \ + dst, dst_stride, \ +- h, &sse); \ ++ h, &sse, NULL, NULL); \ + if (w > wf) { \ + uint32_t sse2; \ + int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 16, \ +@@ -320,20 +326,21 @@ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \ + x_offset, y_offset, \ + dst + 16, \ + dst_stride, \ +- h, &sse2); \ ++ h, &sse2, \ ++ NULL, NULL); \ + se += se2; \ + sse += sse2; \ + if (w > wf * 2) { \ + se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 32, src_stride, \ + x_offset, y_offset, \ + dst + 32, dst_stride, \ +- h, &sse2); \ ++ h, &sse2, NULL, NULL); \ + se += se2; \ + sse += sse2; \ + se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt(src + 48, src_stride, \ + x_offset, y_offset, \ + dst + 48, dst_stride, \ +- h, &sse2); \ ++ h, &sse2, NULL, NULL); \ + se += se2; \ + sse += sse2; \ + } \ +@@ -359,27 +366,27 @@ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \ + int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ + src + (start_row * src_stride), src_stride, \ + x_offset, y_offset, dst + (start_row * dst_stride), \ +- dst_stride, height, &sse2); \ ++ dst_stride, height, &sse2, NULL, NULL); \ + se += se2; \ + long_sse += sse2; \ + if (w > wf) { \ + se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ + src + 16 + (start_row * src_stride), src_stride, \ + x_offset, y_offset, dst + 16 + (start_row * dst_stride), \ +- dst_stride, height, &sse2); \ ++ dst_stride, height, &sse2, NULL, NULL); \ + se += se2; \ + long_sse += sse2; \ + if (w > wf * 2) { \ + se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ + src + 32 + (start_row * src_stride), src_stride, \ + x_offset, y_offset, dst + 32 + (start_row * dst_stride), \ +- dst_stride, height, &sse2); \ ++ dst_stride, height, &sse2, NULL, NULL); \ + se += se2; \ + long_sse += sse2; \ + se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ + src + 48 + (start_row * src_stride), src_stride, \ + x_offset, y_offset, dst + 48 + (start_row * dst_stride), \ +- dst_stride, height, &sse2); \ ++ dst_stride, height, &sse2, NULL, NULL); \ + se += se2; \ + long_sse += sse2; \ + }\ +@@ -410,6 +417,7 @@ FNS(sse2, sse); + #undef FNS + #undef FN + ++// The 2 unused parameters are place holders for PIC enabled build. + #define DECL(w, opt) \ + int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \ + ptrdiff_t src_stride, \ +@@ -419,7 +427,8 @@ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt(const uint16_t *src, \ + const uint16_t *sec, \ + ptrdiff_t sec_stride, \ + int height, \ +- unsigned int *sse); ++ unsigned int *sse, \ ++ void *unused0, void *unused); + #define DECLS(opt1) \ + DECL(16, opt1) \ + DECL(8, opt1) +@@ -439,23 +448,23 @@ uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \ + uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \ + int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ + src, src_stride, x_offset, \ +- y_offset, dst, dst_stride, sec, w, h, &sse); \ ++ y_offset, dst, dst_stride, sec, w, h, &sse, NULL, NULL); \ + if (w > wf) { \ + uint32_t sse2; \ + int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ + src + 16, src_stride, x_offset, y_offset, \ +- dst + 16, dst_stride, sec + 16, w, h, &sse2); \ ++ dst + 16, dst_stride, sec + 16, w, h, &sse2, NULL, NULL); \ + se += se2; \ + sse += sse2; \ + if (w > wf * 2) { \ + se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ + src + 32, src_stride, x_offset, y_offset, \ +- dst + 32, dst_stride, sec + 32, w, h, &sse2); \ ++ dst + 32, dst_stride, sec + 32, w, h, &sse2, NULL, NULL); \ + se += se2; \ + sse += sse2; \ + se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ + src + 48, src_stride, x_offset, y_offset, \ +- dst + 48, dst_stride, sec + 48, w, h, &sse2); \ ++ dst + 48, dst_stride, sec + 48, w, h, &sse2, NULL, NULL); \ + se += se2; \ + sse += sse2; \ + } \ +@@ -475,14 +484,15 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \ + int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ + src, src_stride, x_offset, \ + y_offset, dst, dst_stride, \ +- sec, w, h, &sse); \ ++ sec, w, h, &sse, NULL, NULL); \ + if (w > wf) { \ + uint32_t sse2; \ + int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ + src + 16, src_stride, \ + x_offset, y_offset, \ + dst + 16, dst_stride, \ +- sec + 16, w, h, &sse2); \ ++ sec + 16, w, h, &sse2, \ ++ NULL, NULL); \ + se += se2; \ + sse += sse2; \ + if (w > wf * 2) { \ +@@ -490,14 +500,16 @@ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \ + src + 32, src_stride, \ + x_offset, y_offset, \ + dst + 32, dst_stride, \ +- sec + 32, w, h, &sse2); \ ++ sec + 32, w, h, &sse2, \ ++ NULL, NULL); \ + se += se2; \ + sse += sse2; \ + se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ + src + 48, src_stride, \ + x_offset, y_offset, \ + dst + 48, dst_stride, \ +- sec + 48, w, h, &sse2); \ ++ sec + 48, w, h, &sse2, \ ++ NULL, NULL); \ + se += se2; \ + sse += sse2; \ + } \ +@@ -525,7 +537,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \ + int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ + src + (start_row * src_stride), src_stride, x_offset, \ + y_offset, dst + (start_row * dst_stride), dst_stride, \ +- sec + (start_row * w), w, height, &sse2); \ ++ sec + (start_row * w), w, height, &sse2, NULL, NULL); \ + se += se2; \ + long_sse += sse2; \ + if (w > wf) { \ +@@ -533,7 +545,7 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \ + src + 16 + (start_row * src_stride), src_stride, \ + x_offset, y_offset, \ + dst + 16 + (start_row * dst_stride), dst_stride, \ +- sec + 16 + (start_row * w), w, height, &sse2); \ ++ sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \ + se += se2; \ + long_sse += sse2; \ + if (w > wf * 2) { \ +@@ -541,14 +553,14 @@ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \ + src + 32 + (start_row * src_stride), src_stride, \ + x_offset, y_offset, \ + dst + 32 + (start_row * dst_stride), dst_stride, \ +- sec + 32 + (start_row * w), w, height, &sse2); \ ++ sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \ + se += se2; \ + long_sse += sse2; \ + se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ + src + 48 + (start_row * src_stride), src_stride, \ + x_offset, y_offset, \ + dst + 48 + (start_row * dst_stride), dst_stride, \ +- sec + 48 + (start_row * w), w, height, &sse2); \ ++ sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \ + se += se2; \ + long_sse += sse2; \ + } \ +-- +2.7.0 + |