void x264_macroblock_deblock_strength( x264_t *h )
{
uint8_t (*bs)[8][4] = h->mb.cache.deblock_strength;
if( IS_INTRA( h->mb.i_type ) )
{
M32( bs[0][1] ) = 0x03030303;
M64( bs[0][2] ) = 0x0303030303030303ULL;
M32( bs[1][1] ) = 0x03030303;
M64( bs[1][2] ) = 0x0303030303030303ULL;
return;
}
/* Early termination: in this case, nnz guarantees all edges use strength 2.*/
if( h->mb.b_transform_8x8 && !CHROMA444 )
{
int cbp_mask = 0xf >> CHROMA_V_SHIFT;
if( (h->mb.i_cbp_luma&cbp_mask) == cbp_mask )
{
M32( bs[0][0] ) = 0x02020202;
M32( bs[0][2] ) = 0x02020202;
M32( bs[0][4] ) = 0x02020202;
M64( bs[1][0] ) = 0x0202020202020202ULL; /* [1][1] and [1][3] has to be set for 4:2:2 */
M64( bs[1][2] ) = 0x0202020202020202ULL;
M32( bs[1][4] ) = 0x02020202;
return;
}
}
int neighbour_changed = 0;
if( h->sh.i_disable_deblocking_filter_idc != 2 )
{
neighbour_changed = h->mb.i_neighbour_frame&~h->mb.i_neighbour;
h->mb.i_neighbour = h->mb.i_neighbour_frame;
}
/* MBAFF deblock uses different left neighbors from encoding */
if( SLICE_MBAFF && (h->mb.i_neighbour & MB_LEFT) && (h->mb.field[h->mb.i_mb_xy - 1] != MB_INTERLACED) )
{
h->mb.i_mb_left_xy[1] =
h->mb.i_mb_left_xy[0] = h->mb.i_mb_xy - 1;
if( h->mb.i_mb_y&1 )
h->mb.i_mb_left_xy[0] -= h->mb.i_mb_stride;
else
h->mb.i_mb_left_xy[1] += h->mb.i_mb_stride;
}
/* If we have multiple slices and we're deblocking on slice edges, we
* have to reload neighbour data. */
if( neighbour_changed )
{
int top_y = h->mb.i_mb_top_y;
int top_8x8 = (2*top_y+1) * h->mb.i_b8_stride + 2*h->mb.i_mb_x;
int top_4x4 = (4*top_y+3) * h->mb.i_b4_stride + 4*h->mb.i_mb_x;
int s8x8 = h->mb.i_b8_stride;
int s4x4 = h->mb.i_b4_stride;
uint8_t (*nnz)[48] = h->mb.non_zero_count;
const x264_left_table_t *left_index_table = SLICE_MBAFF ? h->mb.left_index_table : &left_indices[3];
if( neighbour_changed & MB_TOP )
CP32( &h->mb.cache.non_zero_count[x264_scan8[0] - 8], &nnz[h->mb.i_mb_top_xy][12] );
if( neighbour_changed & MB_LEFT )
{
int *left = h->mb.i_mb_left_xy;
h->mb.cache.non_zero_count[x264_scan8[0 ] - 1] = nnz[left[0]][left_index_table->nnz[0]];
h->mb.cache.non_zero_count[x264_scan8[2 ] - 1] = nnz[left[0]][left_index_table->nnz[1]];
h->mb.cache.non_zero_count[x264_scan8[8 ] - 1] = nnz[left[1]][left_index_table->nnz[2]];
h->mb.cache.non_zero_count[x264_scan8[10] - 1] = nnz[left[1]][left_index_table->nnz[3]];
}
for( int l = 0; l <= (h->sh.i_type == SLICE_TYPE_B); l++ )
{
int16_t (*mv)[2] = h->mb.mv[l];
int8_t *ref = h->mb.ref[l];
int i8 = x264_scan8[0] - 8;
if( neighbour_changed & MB_TOP )
{
h->mb.cache.ref[l][i8+0] =
h->mb.cache.ref[l][i8+1] = ref[top_8x8 + 0];
h->mb.cache.ref[l][i8+2] =
h->mb.cache.ref[l][i8+3] = ref[top_8x8 + 1];
CP128( h->mb.cache.mv[l][i8], mv[top_4x4] );
}
i8 = x264_scan8[0] - 1;
if( neighbour_changed & MB_LEFT )
{
h->mb.cache.ref[l][i8+0*8] =
h->mb.cache.ref[l][i8+1*8] = ref[h->mb.left_b8[0] + 1 + s8x8*left_index_table->ref[0]];
h->mb.cache.ref[l][i8+2*8] =
h->mb.cache.ref[l][i8+3*8] = ref[h->mb.left_b8[1] + 1 + s8x8*left_index_table->ref[2]];
CP32( h->mb.cache.mv[l][i8+0*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table->mv[0]] );
CP32( h->mb.cache.mv[l][i8+1*8], mv[h->mb.left_b4[0] + 3 + s4x4*left_index_table->mv[1]] );
CP32( h->mb.cache.mv[l][i8+2*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table->mv[2]] );
CP32( h->mb.cache.mv[l][i8+3*8], mv[h->mb.left_b4[1] + 3 + s4x4*left_index_table->mv[3]] );
}
}
}
if( h->param.analyse.i_weighted_pred == X264_WEIGHTP_SMART && h->sh.i_type == SLICE_TYPE_P )
{
/* Handle reference frame duplicates */
int i8 = x264_scan8[0] - 8;
h->mb.cache.ref[0][i8+0] =
h->mb.cache.ref[0][i8+1] = deblock_ref_table(h->mb.cache.ref[0][i8+0]);
h->mb.cache.ref[0][i8+2] =
h->mb.cache.ref[0][i8+3] = deblock_ref_table(h->mb.cache.ref[0][i8+2]);
i8 = x264_scan8[0] - 1;
h->mb.cache.ref[0][i8+0*8] =
h->mb.cache.ref[0][i8+1*8] = deblock_ref_table(h->mb.cache.ref[0][i8+0*8]);
h->mb.cache.ref[0][i8+2*8] =
h->mb.cache.ref[0][i8+3*8] = deblock_ref_table(h->mb.cache.ref[0][i8+2*8]);
int ref0 = deblock_ref_table(h->mb.cache.ref[0][x264_scan8[ 0]]);
int ref1 = deblock_ref_table(h->mb.cache.ref[0][x264_scan8[ 4]]);
int ref2 = deblock_ref_table(h->mb.cache.ref[0][x264_scan8[ 8]]);
int ref3 = deblock_ref_table(h->mb.cache.ref[0][x264_scan8[12]]);
uint32_t reftop = pack16to32( (uint8_t)ref0, (uint8_t)ref1 ) * 0x0101;
uint32_t refbot = pack16to32( (uint8_t)ref2, (uint8_t)ref3 ) * 0x0101;
M32( &h->mb.cache.ref[0][x264_scan8[0]+8*0] ) = reftop;
M32( &h->mb.cache.ref[0][x264_scan8[0]+8*1] ) = reftop;
M32( &h->mb.cache.ref[0][x264_scan8[0]+8*2] ) = refbot;
M32( &h->mb.cache.ref[0][x264_scan8[0]+8*3] ) = refbot;
}
/* Munge NNZ for cavlc + 8x8dct */
if( !h->param.b_cabac && h->pps->b_transform_8x8_mode )
{
uint8_t (*nnz)[48] = h->mb.non_zero_count;
int top = h->mb.i_mb_top_xy;
int *left = h->mb.i_mb_left_xy;
if( (h->mb.i_neighbour & MB_TOP) && h->mb.mb_transform_size[top] )
{
int i8 = x264_scan8[0] - 8;
int nnz_top0 = M16( &nnz[top][8] ) | M16( &nnz[top][12] );
int nnz_top1 = M16( &nnz[top][10] ) | M16( &nnz[top][14] );
M16( &h->mb.cache.non_zero_count[i8+0] ) = nnz_top0 ? 0x0101 : 0;
M16( &h->mb.cache.non_zero_count[i8+2] ) = nnz_top1 ? 0x0101 : 0;
}
if( h->mb.i_neighbour & MB_LEFT )
{
int i8 = x264_scan8[0] - 1;
if( h->mb.mb_transform_size[left[0]] )
{
int nnz_left0 = M16( &nnz[left[0]][2] ) | M16( &nnz[left[0]][6] );
h->mb.cache.non_zero_count[i8+8*0] = !!nnz_left0;
h->mb.cache.non_zero_count[i8+8*1] = !!nnz_left0;
}
if( h->mb.mb_transform_size[left[1]] )
{
int nnz_left1 = M16( &nnz[left[1]][10] ) | M16( &nnz[left[1]][14] );
h->mb.cache.non_zero_count[i8+8*2] = !!nnz_left1;
h->mb.cache.non_zero_count[i8+8*3] = !!nnz_left1;
}
}
if( h->mb.b_transform_8x8 )
{
int nnz0 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 2]] );
int nnz1 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 4]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[ 6]] );
int nnz2 = M16( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[10]] );
int nnz3 = M16( &h->mb.cache.non_zero_count[x264_scan8[12]] ) | M16( &h->mb.cache.non_zero_count[x264_scan8[14]] );
uint32_t nnztop = pack16to32( !!nnz0, !!nnz1 ) * 0x0101;
uint32_t nnzbot = pack16to32( !!nnz2, !!nnz3 ) * 0x0101;
M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*0] ) = nnztop;
M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*1] ) = nnztop;
M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*2] ) = nnzbot;
M32( &h->mb.cache.non_zero_count[x264_scan8[0]+8*3] ) = nnzbot;
}
}
h->loopf.deblock_strength( h->mb.cache.non_zero_count, h->mb.cache.ref, h->mb.cache.mv,
bs, 4 >> MB_INTERLACED, h->sh.i_type == SLICE_TYPE_B );
if( SLICE_MBAFF )
macroblock_deblock_strength_mbaff( h, bs );
}