diff options
Diffstat (limited to 'gpr/source/lib/vc5_encoder/forward.c')
-rwxr-xr-x | gpr/source/lib/vc5_encoder/forward.c | 851 |
1 files changed, 851 insertions, 0 deletions
diff --git a/gpr/source/lib/vc5_encoder/forward.c b/gpr/source/lib/vc5_encoder/forward.c new file mode 100755 index 0000000..ab54dd8 --- /dev/null +++ b/gpr/source/lib/vc5_encoder/forward.c @@ -0,0 +1,851 @@ +/*! @file forward.c + * + * @brief Implementation of the forward wavelet transform functions. + * + * (C) Copyright 2018 GoPro Inc (http://gopro.com/). + * + * Licensed under either: + * - Apache License, Version 2.0, http://www.apache.org/licenses/LICENSE-2.0 + * - MIT license, http://opensource.org/licenses/MIT + * at your option. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "headers.h" + +#if ENABLED(NEON) +#include <arm_neon.h> +#endif + +//! Rounding added to the highpass sum before division +static const int32_t rounding = 4; + +STATIC_INLINE PIXEL QuantizeValue(int16_t value, int32_t midpoint, int32_t multiplier ) +{ + int less_than_zero = 0; + int negate_less_than_zero = 0; + + int16_t x = abs(value) + midpoint; + + if( value < 0 ) + { + less_than_zero = -1; + negate_less_than_zero = 1; + } + + x = (int32_t)(x * multiplier) >> 16; + + x = x ^ less_than_zero; + x = x + negate_less_than_zero; + + return ClampPixel(x); +} + +static void FilterVerticalTopBottom_Core_8x_C_(PIXEL *coefficients[], int column, int16_t* highpass, int16_t* lowpass, bool top ) +{ + const int filter_coeffs_top[] = { 5, -11, 4, 4, -1, -1 }; + const int filter_coeffs_bottom[] = { 1, 1, -4, -4, 11, -5 }; + + int low_band_index = top ? 0 : 4; + const int* filter_coeffs = top ? filter_coeffs_top : filter_coeffs_bottom; + + int i, f; + + for (i = 0; i < 8; i++) + { + lowpass[i] = coefficients[low_band_index + 0][column + i] + coefficients[low_band_index + 1][column + i]; + } + + for (i = 0; i < 8; i++) + { + int32_t sum = 0; + + for (f = 0; f < 6; f++) + { + sum += filter_coeffs[f] * coefficients[f][column + i]; + } + + sum += rounding; + sum = DivideByShift(sum, 3); + highpass[i] = sum; + } +} + +#if ENABLED(NEON) + +static const uint16x8_t mask = {0x0000, 0xFFFF,0x0000,0xFFFF,0x0000,0xFFFF,0x0000, 0xFFFF}; + +#define HorizontalFilter_Prescale2_4x HorizontalFilter_Prescale2_4x_NEON_ +void HorizontalFilter_Prescale2_4x_NEON_(PIXEL *input, PIXEL* lowpass, PIXEL* highpass ) +{ + const int prescale_rounding = 3; + const int prescale = 2; + + int32x4_t __pairwise_sum_0_7, __highpass; + int32x4_t __diff; + int16x8_t __input_2_9; + + { + const int16x8_t __prescale_rounding = vdupq_n_s16 (prescale_rounding); + const int16x8_t __shift = vdupq_n_s16 (-prescale); + + int16x8_t __input_0_7 = vld1q_s16( input ); + __input_2_9 = vld1q_s16( input + 2 ); + int16x8_t __input_8_15 = vld1q_s16( input + 8 ); + + __input_0_7 = vaddq_s16( __input_0_7, __prescale_rounding ); + __input_0_7 = vshlq_s16( __input_0_7, __shift ); + + __input_8_15 = vaddq_s16( __input_8_15, __prescale_rounding ); + __input_8_15 = vshlq_s16( __input_8_15, __shift ); + + __pairwise_sum_0_7 = vpaddlq_s16(__input_0_7); + int32x4_t __pairwise_sum_8_15 = vpaddlq_s16(__input_8_15); + + __input_0_7 = vbslq_s16(mask, vnegq_s16(__input_0_7), __input_0_7); + __input_8_15 = vbslq_s16(mask, vnegq_s16(__input_8_15), __input_8_15); + __diff = vextq_s32(vpaddlq_s16( __input_0_7 ), vpaddlq_s16( __input_8_15 ), 1); + + __highpass = vcombine_s32( vget_high_s32(__pairwise_sum_0_7), vget_low_s32(__pairwise_sum_8_15) ); + } + + // High pass band + { + const int32x4_t __rounding = vdupq_n_s32(rounding); + + __highpass = vsubq_s32( __highpass, __pairwise_sum_0_7 ); + __highpass = vaddq_s32( __highpass, __rounding ); + __highpass = vshrq_n_s32( __highpass, 3 ); + __highpass = vqaddq_s32( __highpass, __diff ); // Dont need to clamp because we are using saturating instruction + + vst1_s16(highpass, vmovn_s32(__highpass) ); + } + + // Low pass band + { + const int32x4_t __prescale_rounding = vdupq_n_s32(prescale_rounding); + const int32x4_t __shift = vdupq_n_s32(-prescale); + + int32x4_t __pairwise_sum_2_9 = vpaddlq_s16(__input_2_9); + + __pairwise_sum_2_9 = vaddq_s32(__pairwise_sum_2_9, __prescale_rounding); + __pairwise_sum_2_9 = vshlq_s32(__pairwise_sum_2_9, __shift); + + vst1_s16(lowpass, vmovn_s32(__pairwise_sum_2_9) ); + } +} + +#define HorizontalFilter_Prescale0_4x HorizontalFilter_Prescale0_4x_NEON_ +void HorizontalFilter_Prescale0_4x_NEON_(PIXEL *input, PIXEL* lowpass, PIXEL* highpass ) +{ + int32x4_t __pairwise_sum_0_7, __highpass; + int32x4_t __diff; + int16x8_t __input_2_9; + + { + int16x8_t __input_0_7 = vld1q_s16( input ); + __input_2_9 = vld1q_s16( input + 2 ); + int16x8_t __input_8_15 = vld1q_s16( input + 8 ); + + __pairwise_sum_0_7 = vpaddlq_s16(__input_0_7); + int32x4_t __pairwise_sum_8_15 = vpaddlq_s16(__input_8_15); + + __input_0_7 = vbslq_s16(mask, vnegq_s16(__input_0_7), __input_0_7); + __input_8_15 = vbslq_s16(mask, vnegq_s16(__input_8_15), __input_8_15); + __diff = vextq_s32(vpaddlq_s16( __input_0_7 ), vpaddlq_s16( __input_8_15 ), 1); + + __highpass = vcombine_s32( vget_high_s32(__pairwise_sum_0_7), vget_low_s32(__pairwise_sum_8_15) ); + } + + // High pass band + { + const int32x4_t __rounding = vdupq_n_s32(rounding); + + __highpass = vsubq_s32( __highpass, __pairwise_sum_0_7 ); + __highpass = vaddq_s32( __highpass, __rounding ); + __highpass = vshrq_n_s32( __highpass, 3 ); + __highpass = vqaddq_s32( __highpass, __diff ); // Dont need to clamp because we are using saturating instruction + + vst1_s16(highpass, vmovn_s32(__highpass) ); + } + + // Low pass band + { + int32x4_t __pairwise_sum_2_9 = vpaddlq_s16(__input_2_9); + + vst1_s16(lowpass, vmovn_s32(__pairwise_sum_2_9) ); + } +} + +void QuantizeBand_8x_NEON_(int16_t* wavelet_band, int16_t midpoint, int32_t multiplier, PIXEL *output ) +{ + int16x8_t __wavelet_band = vld1q_s16( wavelet_band ); + + int16x8_t __wavelet_band_abs = vaddq_s16( vabsq_s16(__wavelet_band), vdupq_n_s16( midpoint ) ); + + int32x4_t __multipliers = vdupq_n_s32( multiplier ); + + int32x4_t __value_high = vmovl_s16( vget_high_s16(__wavelet_band_abs) ); + __value_high = vmulq_s32( __value_high, __multipliers ); + + int32x4_t __value_low = vmovl_s16( vget_low_s16(__wavelet_band_abs) ); + __value_low = vmulq_s32( __value_low, __multipliers ); + + int16x8_t __multiplied = vcombine_s16( vshrn_n_s32( __value_low, 16 ), vshrn_n_s32( __value_high, 16 ) ); + + uint16x8_t mask = vcltq_s16(__wavelet_band, vdupq_n_s16(0) ); + int16x8_t __neg_output = vnegq_s16(__multiplied); + + int16x8_t __result = vbslq_s16( mask, __neg_output, __multiplied ); + + vst1q_s16(output, __result); +} + +void FilterVerticalMiddle_Core_8x_NEON_(PIXEL *coefficients[], int column, int16_t* highpass, int16_t* lowpass ) +{ + int16x8_t __highpass, __highpass_50, __highpass_14; + + { + int16x8_t __row_0 = vld1q_s16( &coefficients[0][column] ); + int16x8_t __row_5 = vld1q_s16( &coefficients[5][column] ); + + __highpass_50 = vsubq_s16( __row_5, __row_0 ); + } + + { + int16x8_t __row_1 = vld1q_s16( &coefficients[1][column] ); + int16x8_t __row_4 = vld1q_s16( &coefficients[4][column] ); + + __highpass_14 = vsubq_s16( __row_4, __row_1 ); + } + + { + int16x8_t __rounding = vdupq_n_s16 (rounding); + + __highpass = vaddq_s16( __highpass_50, __highpass_14 ); + __highpass = vaddq_s16( __highpass, __rounding ); + __highpass = vshrq_n_s16(__highpass, 3); + } + + { + int16x8_t __row_2 = vld1q_s16( &coefficients[2][column] ); + int16x8_t __row_3 = vld1q_s16( &coefficients[3][column] ); + + int16x8_t __diff_23 = vsubq_s16( __row_2, __row_3 ); + int16x8_t __sum_23 = vaddq_s16( __row_2, __row_3 ); + + __highpass = vaddq_s16( __highpass, __diff_23 ); + + vst1q_s16(lowpass, __sum_23); + } + + vst1q_s16(highpass, __highpass); + +} + +#define FilterVerticalMiddle_8x FilterVerticalMiddle_8x_NEON_ +void FilterVerticalMiddle_8x_NEON_(PIXEL *lowpass[], PIXEL *highpass[], int column, int32_t* midpoints, int32_t* multipliers, PIXEL *result[]) +{ + int16_t LOW[8]; + int16_t HIGH[8]; + + FilterVerticalMiddle_Core_8x_NEON_( highpass, column, HIGH, LOW); + QuantizeBand_8x_NEON_( LOW, midpoints[LH_BAND], multipliers[LH_BAND], result[LH_BAND] + column ); + QuantizeBand_8x_NEON_( HIGH, midpoints[HH_BAND], multipliers[HH_BAND], result[HH_BAND] + column ); + + FilterVerticalMiddle_Core_8x_NEON_( lowpass, column, HIGH, LOW); + QuantizeBand_8x_NEON_( LOW, midpoints[LL_BAND], multipliers[LL_BAND], result[LL_BAND] + column ); + QuantizeBand_8x_NEON_( HIGH, midpoints[HL_BAND], multipliers[HL_BAND], result[HL_BAND] + column ); +} + +#define FilterVerticalTopBottom_8x FilterVerticalTopBottom_8x_NEON_ +void FilterVerticalTopBottom_8x_NEON_(PIXEL *lowpass[], PIXEL *highpass[], int column, int32_t* midpoints, int32_t* multipliers, PIXEL *result[], bool top ) +{ + int16_t LOW[8]; + int16_t HIGH[8]; + + FilterVerticalTopBottom_Core_8x_C_( highpass, column, HIGH, LOW, top ); + QuantizeBand_8x_NEON_( LOW, midpoints[LH_BAND], multipliers[LH_BAND], result[LH_BAND] + column ); + QuantizeBand_8x_NEON_( HIGH, midpoints[HH_BAND], multipliers[HH_BAND], result[HH_BAND] + column ); + + FilterVerticalTopBottom_Core_8x_C_( lowpass, column, HIGH, LOW, top ); + QuantizeBand_8x_NEON_( LOW, midpoints[LL_BAND], multipliers[LL_BAND], result[LL_BAND] + column ); + QuantizeBand_8x_NEON_( HIGH, midpoints[HL_BAND], multipliers[HL_BAND], result[HL_BAND] + column ); +} + +#else + +#define HorizontalFilter_Prescale2_4x HorizontalFilter_Prescale2_4x_C_ +void HorizontalFilter_Prescale2_4x_C_(PIXEL *input, PIXEL* lowpass, PIXEL* highpass ) +{ + int i; + PIXEL input_c[16]; + for ( i = 0; i < 12; i++) + { + input_c[i] = (input[i] + 3) >> 2; + } + + int32_t diff_23 = input_c[2] - input_c[3]; + int32_t diff_45 = input_c[4] - input_c[5]; + int32_t diff_67 = input_c[6] - input_c[7]; + int32_t diff_89 = input_c[8] - input_c[9]; + + int32_t sum_01 = input_c[0] + input_c[1]; + int32_t sum_23 = input_c[2] + input_c[3]; + int32_t sum_45 = input_c[4] + input_c[5]; + int32_t sum_67 = input_c[6] + input_c[7]; + int32_t sum_89 = input_c[8] + input_c[9]; + int32_t sum_1011 = input_c[10] + input_c[11]; + + { + int32_t sum = sum_45 - sum_01; + + sum += rounding; + sum = DivideByShift(sum, 3); + sum += diff_23; + + highpass[0] = ClampPixel(sum); + } + + { + int32_t sum = sum_67 - sum_23; + + sum += rounding; + sum = DivideByShift(sum, 3); + sum += diff_45; + + highpass[1] = ClampPixel(sum); + } + + { + int32_t sum = sum_89 - sum_45; + + sum += rounding; + sum = DivideByShift(sum, 3); + sum += diff_67; + + highpass[2] = ClampPixel(sum); + } + + { + int32_t sum = sum_1011 - sum_67; + + sum += rounding; + sum = DivideByShift(sum, 3); + sum += diff_89; + + highpass[3] = ClampPixel(sum); + } + + lowpass[0] = (input[2] + input[3] + 3) >> 2; + lowpass[1] = (input[4] + input[5] + 3) >> 2; + lowpass[2] = (input[6] + input[7] + 3) >> 2; + lowpass[3] = (input[8] + input[9] + 3) >> 2; +} + +#define HorizontalFilter_Prescale0_4x HorizontalFilter_Prescale0_4x_C_ +void HorizontalFilter_Prescale0_4x_C_(PIXEL *input, PIXEL* lowpass, PIXEL* highpass ) +{ + PIXEL input_c[16]; + + memcpy(input_c, input, sizeof(PIXEL) * 12); + + int32_t diff_23 = input_c[2] - input_c[3]; + int32_t diff_45 = input_c[4] - input_c[5]; + int32_t diff_67 = input_c[6] - input_c[7]; + int32_t diff_89 = input_c[8] - input_c[9]; + + int32_t sum_01 = input_c[0] + input_c[1]; + int32_t sum_23 = input_c[2] + input_c[3]; + int32_t sum_45 = input_c[4] + input_c[5]; + int32_t sum_67 = input_c[6] + input_c[7]; + int32_t sum_89 = input_c[8] + input_c[9]; + int32_t sum_1011 = input_c[10] + input_c[11]; + + { + int32_t sum = sum_45 - sum_01; + + sum += rounding; + sum = DivideByShift(sum, 3); + sum += diff_23; + + highpass[0] = ClampPixel(sum); + } + + { + int32_t sum = sum_67 - sum_23; + + sum += rounding; + sum = DivideByShift(sum, 3); + sum += diff_45; + + highpass[1] = ClampPixel(sum); + } + + { + int32_t sum = sum_89 - sum_45; + + sum += rounding; + sum = DivideByShift(sum, 3); + sum += diff_67; + + highpass[2] = ClampPixel(sum); + } + + { + int32_t sum = sum_1011 - sum_67; + + sum += rounding; + sum = DivideByShift(sum, 3); + sum += diff_89; + + highpass[3] = ClampPixel(sum); + } + + lowpass[0] = input[2] + input[3]; + lowpass[1] = input[4] + input[5]; + lowpass[2] = input[6] + input[7]; + lowpass[3] = input[8] + input[9]; +} + +void FilterVerticalMiddle_Core_8x_C_(PIXEL *coefficients[], int column, int16_t* highpass, int16_t* lowpass ) +{ + PIXEL row_0[8]; + PIXEL row_1[8]; + PIXEL row_2[8]; + PIXEL row_3[8]; + PIXEL row_4[8]; + PIXEL row_5[8]; + + int i; + + memcpy( row_0, &coefficients[0][column], sizeof(PIXEL) * 8 ); + memcpy( row_1, &coefficients[1][column], sizeof(PIXEL) * 8 ); + memcpy( row_2, &coefficients[2][column], sizeof(PIXEL) * 8 ); + memcpy( row_3, &coefficients[3][column], sizeof(PIXEL) * 8 ); + memcpy( row_4, &coefficients[4][column], sizeof(PIXEL) * 8 ); + memcpy( row_5, &coefficients[5][column], sizeof(PIXEL) * 8 ); + + for (i = 0; i < 8; i++) + highpass[i] = (-1 * row_0[i] + row_5[i]); + + for (i = 0; i < 8; i++) + highpass[i] += (-1 * row_1[i] + row_4[i]); + + for (i = 0; i < 8; i++) + highpass[i] += rounding; + + for (i = 0; i < 8; i++) + highpass[i] = DivideByShift(highpass[i], 3); + + for (i = 0; i < 8; i++) + { + highpass[i] += (row_2[i] - row_3[i]); + lowpass[i] = (row_2[i] + row_3[i]); + } +} + + +#define FilterVerticalMiddle_8x FilterVerticalMiddle_8x_C_ +void FilterVerticalMiddle_8x_C_(PIXEL *lowpass[], PIXEL *highpass[], int column, int32_t* midpoints, int32_t* multipliers, PIXEL *result[]) +{ + int i; + + int16_t LL[8]; + int16_t HL[8]; + int16_t LH[8]; + int16_t HH[8]; + + FilterVerticalMiddle_Core_8x_C_( highpass, column, HH, LH); + FilterVerticalMiddle_Core_8x_C_( lowpass, column, HL, LL); + + for (i = 0; i < 8; i++) + { + result[LL_BAND][column + i] = QuantizeValue( LL[i], midpoints[LL_BAND], multipliers[LL_BAND] ); + result[LH_BAND][column + i] = QuantizeValue( LH[i], midpoints[LH_BAND], multipliers[LH_BAND] ); + result[HL_BAND][column + i] = QuantizeValue( HL[i], midpoints[HL_BAND], multipliers[HL_BAND] ); + result[HH_BAND][column + i] = QuantizeValue( HH[i], midpoints[HH_BAND], multipliers[HH_BAND] ); + } +} + +#define FilterVerticalTopBottom_8x FilterVerticalTopBottom_8x_C_ +void FilterVerticalTopBottom_8x_C_(PIXEL *lowpass[], PIXEL *highpass[], int column, int32_t* midpoints, int32_t* multipliers, PIXEL *result[], bool top ) +{ + int i; + + int16_t LL[8]; + int16_t HL[8]; + int16_t LH[8]; + int16_t HH[8]; + + FilterVerticalTopBottom_Core_8x_C_( highpass, column, HH, LH, top ); + FilterVerticalTopBottom_Core_8x_C_( lowpass, column, HL, LL, top ); + + for (i = 0; i < 8; i++) + { + result[LL_BAND][column + i] = QuantizeValue( LL[i], midpoints[LL_BAND], multipliers[LL_BAND] ); + result[LH_BAND][column + i] = QuantizeValue( LH[i], midpoints[LH_BAND], multipliers[LH_BAND] ); + result[HL_BAND][column + i] = QuantizeValue( HL[i], midpoints[HL_BAND], multipliers[HL_BAND] ); + result[HH_BAND][column + i] = QuantizeValue( HH[i], midpoints[HH_BAND], multipliers[HH_BAND] ); + } +} + +#endif + +static PIXEL HorizontalHighPassFilter_Middle(PIXEL *input, int prescale_rounding, int prescale) +{ + int32_t sum; + + if( prescale == 0 ) + { + sum = -input[0] - input[1] + (input[2] << 3) - (input[3] << 3) + input[4] + input[5]; + } + else + { + sum = 0; + sum -= (input[0] + prescale_rounding) >> prescale; + sum -= (input[1] + prescale_rounding) >> prescale; + sum += ((input[2] + prescale_rounding) >> prescale) << 3; + sum -= ((input[3] + prescale_rounding) >> prescale) << 3; + sum += (input[4] + prescale_rounding) >> prescale; + sum += (input[5] + prescale_rounding) >> prescale; + } + + sum += rounding; + sum = DivideByShift(sum, 3); + return ClampPixel(sum); +} + +static PIXEL HorizontalHighPassFilter(PIXEL *input, PIXEL *multipliers, int prescale_rounding, int prescale) +{ + int i; + int32_t sum = 0; + + if( prescale == 0 ) + { + for (i = 0; i < 6; i++) + { + sum += multipliers[i] * input[i]; + } + + } + else + { + for (i = 0; i < 6; i++) + { + sum += multipliers[i] * ( (input[i] + prescale_rounding) >> prescale); + } + } + + sum += rounding; + sum = DivideByShift(sum, 3); + return ClampPixel(sum); +} + +/*! + @brief Apply the horizontal wavelet filter to a row of pixels +*/ +CODEC_ERROR FilterHorizontalRow(PIXEL *input, PIXEL *lowpass, PIXEL *highpass, int width, int prescale) +{ + int column = 2; + + //uint16_t *input = (uint16_t *)input_buffer; + + //TODO: Check that the rounding is correct for all prescale values + int prescale_rounding = (1 << prescale) - 1; + + const int last_input_column = ((width % 2) == 0) ? width - 2 : width - 1; + + int last_input_column_tight = ( (last_input_column - 4) / 8) * 8; + + //TODO Test this routine with other prescale values + assert(prescale == 0 || prescale == 2); + + /***** Process the left border using the formula for boundary conditions *****/ + + // Compute the lowpass coefficient + lowpass[0] = (input[0] + input[1] + prescale_rounding) >> prescale; + + { + PIXEL coefficients[6] = { 5, -11, 4, 4, -1, -1 }; + highpass[0] = HorizontalHighPassFilter(input, coefficients, prescale_rounding, prescale ); + } + + if( prescale == 2 ) + { + /***** Process the internal pixels using the normal wavelet formula *****/ + for (; column < last_input_column_tight; column += 8) // + { + // Column index should always be divisible by two + assert((column % 2) == 0); + + HorizontalFilter_Prescale2_4x( input + column - 2, &lowpass[column/2], &highpass[column/2] ); + } + } + else if( prescale == 0 ) + { + /***** Process the internal pixels using the normal wavelet formula *****/ + for (; column < last_input_column_tight; column += 8) // + { + // Column index should always be divisible by two + assert((column % 2) == 0); + + HorizontalFilter_Prescale0_4x( input + column - 2, &lowpass[column/2], &highpass[column/2] ); + } + } + else + { + assert(0); + } + + for (; column < last_input_column; column += 2) + { + // Column index should always be divisible by two + assert((column % 2) == 0); + + // Compute the lowpass coefficient + lowpass[column/2] = (input[column + 0] + input[column + 1] + prescale_rounding) >> prescale; + + // Initialize the sum for computing the highpass coefficient + if ((column + 3) < width) + { + highpass[column/2] = HorizontalHighPassFilter_Middle(input + column - 2, prescale_rounding, prescale ); + } + else + { + int32_t sum = 0; + + sum -= (input[column - 2] + prescale_rounding) >> prescale; + sum -= (input[column - 1] + prescale_rounding) >> prescale; + sum += (input[column + 2] + prescale_rounding) >> prescale; + + // Duplicate the value in the last column + sum += (input[column + 2] + prescale_rounding) >> prescale; + sum += rounding; + sum = DivideByShift(sum, 3); + sum += (input[column + 0] + prescale_rounding) >> prescale; + sum -= (input[column + 1] + prescale_rounding) >> prescale; + highpass[column/2] = ClampPixel(sum); + } + } + + // Should have exited the loop at the last column + assert(column == last_input_column); + + /***** Process the right border using the formula for boundary conditions *****/ + + // Compute the lowpass coefficient + if ((column + 1) < width) + { + PIXEL coefficients[6] = { 1, 1, -4, -4, 11, -5 }; + highpass[column/2] = HorizontalHighPassFilter(input + column - 4, coefficients, prescale_rounding, prescale ); + + // Use the value in the last column + lowpass[column/2] = (input[column + 0] + input[column + 1] + prescale_rounding) >> prescale; + } + else + { + int32_t sum = 0; + // Duplicate the value in the last column + sum -= 5 * ((input[column + 0] + prescale_rounding) >> prescale); + + sum += 11 * ((input[column + 0] + prescale_rounding) >> prescale); + sum -= 4 * ((input[column - 1] + prescale_rounding) >> prescale); + sum -= 4 * ((input[column - 2] + prescale_rounding) >> prescale); + sum += 1 * ((input[column - 3] + prescale_rounding) >> prescale); + sum += 1 * ((input[column - 4] + prescale_rounding) >> prescale); + sum += rounding; + sum = DivideByShift(sum, 3); + + highpass[column/2] = ClampPixel(sum); + + // Duplicate the value in the last column + lowpass[column/2] = (input[column + 0] + input[column + 0] + prescale_rounding) >> prescale; + } + + return CODEC_ERROR_OKAY; +} + +static void FilterVerticalMiddle_1x(PIXEL *lowpass[], PIXEL *highpass[], int column, int32_t* midpoints, int32_t* multipliers, PIXEL *result[]) +{ + { + const PIXEL coefficients_01 = lowpass[0][column] + lowpass[1][column]; + const PIXEL coefficients_2 = lowpass[2][column]; + const PIXEL coefficients_3 = lowpass[3][column]; + const PIXEL coefficients_45 = lowpass[4][column] + lowpass[5][column]; + + int32_t sum = coefficients_45 - coefficients_01; + sum += 8 * (coefficients_2 - coefficients_3); + sum = DivideByShift(sum + rounding, 3); + + result[LL_BAND][column] = QuantizeValue( coefficients_2 + coefficients_3, midpoints[LL_BAND], multipliers[LL_BAND] ); + result[HL_BAND][column] = QuantizeValue( sum, midpoints[HL_BAND], multipliers[HL_BAND] ); + } + + { + const PIXEL coefficients_01 = highpass[0][column] + highpass[1][column]; + const PIXEL coefficients_2 = highpass[2][column]; + const PIXEL coefficients_3 = highpass[3][column]; + const PIXEL coefficients_45 = highpass[4][column] + highpass[5][column]; + + int32_t sum = coefficients_45 - coefficients_01; + sum += 8 * (coefficients_2 - coefficients_3); + sum = DivideByShift(sum + rounding, 3); + + result[LH_BAND][column] = QuantizeValue( coefficients_2 + coefficients_3, midpoints[LH_BAND], multipliers[LH_BAND] ); + result[HH_BAND][column] = QuantizeValue( sum, midpoints[HH_BAND], multipliers[HH_BAND] ); + } +} + +static void FilterVerticalTopBottom_1x(PIXEL *lowpass[], PIXEL *highpass[], int column, int32_t* midpoints, int32_t* multipliers, PIXEL *result[], bool top ) +{ + const int filter_coeffs_top[] = { 5, -11, 4, 4, -1, -1 }; + const int filter_coeffs_bottom[] = { 1, 1, -4, -4, 11, -5 }; + + int low_band_index = top ? 0 : 4; + const int* filter_coeffs = top ? filter_coeffs_top : filter_coeffs_bottom; + + int i; + int32_t sum_L = 0; + int32_t sum_H = 0; + + // Apply the lowpass vertical filter to the lowpass horizontal results + result[LL_BAND][column] = QuantizeValue( lowpass[low_band_index + 0][column] + lowpass[low_band_index + 1][column], midpoints[LL_BAND], multipliers[LL_BAND] ); + + // Apply the lowpass vertical filter to the highpass horizontal results + result[LH_BAND][column] = QuantizeValue( highpass[low_band_index + 0][column] + highpass[low_band_index + 1][column], midpoints[LH_BAND], multipliers[LH_BAND] ); + + for (i = 0; i < 6; i++) + { + sum_L += filter_coeffs[i] * lowpass[i][column]; + sum_H += filter_coeffs[i] * highpass[i][column]; + } + + sum_L += rounding; + sum_L = DivideByShift(sum_L, 3); + result[HL_BAND][column] = QuantizeValue( sum_L, midpoints[HL_BAND], multipliers[HL_BAND] ); + + sum_H += rounding; + sum_H = DivideByShift(sum_H, 3); + result[HH_BAND][column] = QuantizeValue( sum_H, midpoints[HH_BAND], multipliers[HH_BAND] ); +} + +/*! + @brief Apply the vertical wavelet filter to the first row + + This routine uses the wavelet formulas for the top row of an image + + The midpoint prequant argument is not the offset that is added to the + value prior to quantization. It is a setting indicating which midpoint + offset to use. + + @todo Change the midpoint_prequant argument to midpoint_setting? +*/ +CODEC_ERROR FilterVerticalTopRow(PIXEL **lowpass, PIXEL **highpass, PIXEL **output, int wavelet_width, int wavelet_pitch, int32_t midpoints[], int32_t multipliers[], int input_row ) +{ + int column = 0; + const int wavelet_width_m8 = (wavelet_width / 8) * 8; + + assert(input_row == 0); + + for (; column < wavelet_width_m8; column += 8) + { + FilterVerticalTopBottom_8x( lowpass, highpass, column, midpoints, multipliers, output, true ); + + assert(output[LL_BAND][column] >= 0); + } + + for (; column < wavelet_width; column++) + { + FilterVerticalTopBottom_1x( lowpass, highpass, column, midpoints, multipliers, output, true ); + + assert(output[LL_BAND][column] >= 0); + } + + return CODEC_ERROR_OKAY; +} + + +CODEC_ERROR FilterVerticalBottomRow(PIXEL **lowpass, PIXEL **highpass, PIXEL **output, int wavelet_width, int wavelet_pitch, int32_t midpoints[], int32_t multipliers[], int input_row ) +{ + PIXEL *result[MAX_BAND_COUNT]; + int column = 0; + const int wavelet_width_m8 = (wavelet_width / 8) * 8; + + int band; + + //uint16_t **lowpass = (uint16_t **)lowpass_buffer; + + int output_row = input_row / 2; + + // Compute the address of each output row + for (band = 0; band < MAX_BAND_COUNT; band++) + { + uint8_t *band_row_ptr = (uint8_t *)output[band]; + band_row_ptr += output_row * wavelet_pitch; + result[band] = (PIXEL *)band_row_ptr; + } + + for (; column < wavelet_width_m8; column += 8) + { + FilterVerticalTopBottom_8x( lowpass, highpass, column, midpoints, multipliers, result, false ); + + assert(result[LL_BAND][column] >= 0); + } + + for (; column < wavelet_width; column++) + { + FilterVerticalTopBottom_1x( lowpass, highpass, column, midpoints, multipliers, result, false ); + + assert(result[LL_BAND][column] >= 0); + } + + return CODEC_ERROR_OKAY; +} + +/*! + @brief Apply the vertical wavelet filter to a middle row + + This routine uses the wavelet formulas for the middle rows of an image +*/ +CODEC_ERROR FilterVerticalMiddleRow(PIXEL **lowpass, PIXEL **highpass, PIXEL **output, int wavelet_width, int wavelet_pitch, int32_t midpoints[], int32_t multipliers[], int input_row ) +{ + PIXEL *result[MAX_BAND_COUNT]; + + int column = 0; + int band; + + const int wavelet_width_m8 = (wavelet_width / 8) * 8; + + //uint16_t **lowpass = (uint16_t **)lowpass_buffer; + + int output_row = input_row / 2; + + // Compute the address of each output row + for (band = 0; band < MAX_BAND_COUNT; band++) + { + uint8_t *band_row_ptr = (uint8_t *)output[band]; + band_row_ptr += output_row * wavelet_pitch; + result[band] = (PIXEL *)band_row_ptr; + } + + for (; column < wavelet_width_m8; column += 8) + { + FilterVerticalMiddle_8x(lowpass, highpass, column, midpoints, multipliers, result); + } + + for (; column < wavelet_width; column += 1) + { + FilterVerticalMiddle_1x(lowpass, highpass, column, midpoints, multipliers, result); + + assert(result[LL_BAND][column] >= 0); + } + + return CODEC_ERROR_OKAY; +} + |