x264 source for verification 2026-05-22
This commit is contained in:
3895
encoder/analyse.c
Normal file
3895
encoder/analyse.c
Normal file
File diff suppressed because it is too large
Load Diff
55
encoder/analyse.h
Normal file
55
encoder/analyse.h
Normal file
@@ -0,0 +1,55 @@
|
||||
/*****************************************************************************
|
||||
* analyse.h: macroblock analysis
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2003-2025 x264 project
|
||||
*
|
||||
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
|
||||
* Loren Merritt <lorenm@u.washington.edu>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_ENCODER_ANALYSE_H
|
||||
#define X264_ENCODER_ANALYSE_H
|
||||
|
||||
#define x264_analyse_init_costs x264_template(analyse_init_costs)
|
||||
int x264_analyse_init_costs( x264_t *h );
|
||||
#define x264_analyse_free_costs x264_template(analyse_free_costs)
|
||||
void x264_analyse_free_costs( x264_t *h );
|
||||
#define x264_analyse_weight_frame x264_template(analyse_weight_frame)
|
||||
void x264_analyse_weight_frame( x264_t *h, int end );
|
||||
#define x264_macroblock_analyse x264_template(macroblock_analyse)
|
||||
void x264_macroblock_analyse( x264_t *h );
|
||||
#define x264_slicetype_decide x264_template(slicetype_decide)
|
||||
void x264_slicetype_decide( x264_t *h );
|
||||
|
||||
#define x264_slicetype_analyse x264_template(slicetype_analyse)
|
||||
void x264_slicetype_analyse( x264_t *h, int intra_minigop );
|
||||
|
||||
#define x264_lookahead_init x264_template(lookahead_init)
|
||||
int x264_lookahead_init( x264_t *h, int i_slicetype_length );
|
||||
#define x264_lookahead_is_empty x264_template(lookahead_is_empty)
|
||||
int x264_lookahead_is_empty( x264_t *h );
|
||||
#define x264_lookahead_put_frame x264_template(lookahead_put_frame)
|
||||
void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame );
|
||||
#define x264_lookahead_get_frames x264_template(lookahead_get_frames)
|
||||
void x264_lookahead_get_frames( x264_t *h );
|
||||
#define x264_lookahead_delete x264_template(lookahead_delete)
|
||||
void x264_lookahead_delete( x264_t *h );
|
||||
|
||||
#endif
|
||||
199
encoder/api.c
Normal file
199
encoder/api.c
Normal file
@@ -0,0 +1,199 @@
|
||||
/*****************************************************************************
|
||||
* api.c: bit depth independent interface
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2003-2025 x264 project
|
||||
*
|
||||
* Authors: Vittorio Giovara <vittorio.giovara@gmail.com>
|
||||
* Luca Barbato <lu_zero@gentoo.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common/base.h"
|
||||
|
||||
/****************************************************************************
|
||||
* global symbols
|
||||
****************************************************************************/
|
||||
const int x264_chroma_format = X264_CHROMA_FORMAT;
|
||||
|
||||
x264_t *x264_8_encoder_open( x264_param_t *, void * );
|
||||
void x264_8_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
|
||||
int x264_8_encoder_reconfig( x264_t *, x264_param_t * );
|
||||
void x264_8_encoder_parameters( x264_t *, x264_param_t * );
|
||||
int x264_8_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
|
||||
int x264_8_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
|
||||
void x264_8_encoder_close( x264_t * );
|
||||
int x264_8_encoder_delayed_frames( x264_t * );
|
||||
int x264_8_encoder_maximum_delayed_frames( x264_t * );
|
||||
void x264_8_encoder_intra_refresh( x264_t * );
|
||||
int x264_8_encoder_invalidate_reference( x264_t *, int64_t pts );
|
||||
|
||||
x264_t *x264_10_encoder_open( x264_param_t *, void * );
|
||||
void x264_10_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal );
|
||||
int x264_10_encoder_reconfig( x264_t *, x264_param_t * );
|
||||
void x264_10_encoder_parameters( x264_t *, x264_param_t * );
|
||||
int x264_10_encoder_headers( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
|
||||
int x264_10_encoder_encode( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
|
||||
void x264_10_encoder_close( x264_t * );
|
||||
int x264_10_encoder_delayed_frames( x264_t * );
|
||||
int x264_10_encoder_maximum_delayed_frames( x264_t * );
|
||||
void x264_10_encoder_intra_refresh( x264_t * );
|
||||
int x264_10_encoder_invalidate_reference( x264_t *, int64_t pts );
|
||||
|
||||
typedef struct x264_api_t
|
||||
{
|
||||
/* Internal reference to x264_t data */
|
||||
x264_t *x264;
|
||||
|
||||
/* API entry points */
|
||||
void (*nal_encode)( x264_t *h, uint8_t *dst, x264_nal_t *nal );
|
||||
int (*encoder_reconfig)( x264_t *, x264_param_t * );
|
||||
void (*encoder_parameters)( x264_t *, x264_param_t * );
|
||||
int (*encoder_headers)( x264_t *, x264_nal_t **pp_nal, int *pi_nal );
|
||||
int (*encoder_encode)( x264_t *, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out );
|
||||
void (*encoder_close)( x264_t * );
|
||||
int (*encoder_delayed_frames)( x264_t * );
|
||||
int (*encoder_maximum_delayed_frames)( x264_t * );
|
||||
void (*encoder_intra_refresh)( x264_t * );
|
||||
int (*encoder_invalidate_reference)( x264_t *, int64_t pts );
|
||||
} x264_api_t;
|
||||
|
||||
REALIGN_STACK x264_t *x264_encoder_open( x264_param_t *param )
|
||||
{
|
||||
x264_api_t *api = calloc( 1, sizeof( x264_api_t ) );
|
||||
if( !api )
|
||||
return NULL;
|
||||
|
||||
#if HAVE_BITDEPTH8
|
||||
if( param->i_bitdepth == 8 )
|
||||
{
|
||||
api->nal_encode = x264_8_nal_encode;
|
||||
api->encoder_reconfig = x264_8_encoder_reconfig;
|
||||
api->encoder_parameters = x264_8_encoder_parameters;
|
||||
api->encoder_headers = x264_8_encoder_headers;
|
||||
api->encoder_encode = x264_8_encoder_encode;
|
||||
api->encoder_close = x264_8_encoder_close;
|
||||
api->encoder_delayed_frames = x264_8_encoder_delayed_frames;
|
||||
api->encoder_maximum_delayed_frames = x264_8_encoder_maximum_delayed_frames;
|
||||
api->encoder_intra_refresh = x264_8_encoder_intra_refresh;
|
||||
api->encoder_invalidate_reference = x264_8_encoder_invalidate_reference;
|
||||
|
||||
api->x264 = x264_8_encoder_open( param, api );
|
||||
}
|
||||
else
|
||||
#endif
|
||||
#if HAVE_BITDEPTH10
|
||||
if( param->i_bitdepth == 10 )
|
||||
{
|
||||
api->nal_encode = x264_10_nal_encode;
|
||||
api->encoder_reconfig = x264_10_encoder_reconfig;
|
||||
api->encoder_parameters = x264_10_encoder_parameters;
|
||||
api->encoder_headers = x264_10_encoder_headers;
|
||||
api->encoder_encode = x264_10_encoder_encode;
|
||||
api->encoder_close = x264_10_encoder_close;
|
||||
api->encoder_delayed_frames = x264_10_encoder_delayed_frames;
|
||||
api->encoder_maximum_delayed_frames = x264_10_encoder_maximum_delayed_frames;
|
||||
api->encoder_intra_refresh = x264_10_encoder_intra_refresh;
|
||||
api->encoder_invalidate_reference = x264_10_encoder_invalidate_reference;
|
||||
|
||||
api->x264 = x264_10_encoder_open( param, api );
|
||||
}
|
||||
else
|
||||
#endif
|
||||
x264_log_internal( X264_LOG_ERROR, "not compiled with %d bit depth support\n", param->i_bitdepth );
|
||||
|
||||
if( !api->x264 )
|
||||
{
|
||||
free( api );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* x264_t is opaque */
|
||||
return (x264_t *)api;
|
||||
}
|
||||
|
||||
REALIGN_STACK void x264_encoder_close( x264_t *h )
|
||||
{
|
||||
x264_api_t *api = (x264_api_t *)h;
|
||||
|
||||
api->encoder_close( api->x264 );
|
||||
free( api );
|
||||
}
|
||||
|
||||
REALIGN_STACK void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal )
|
||||
{
|
||||
x264_api_t *api = (x264_api_t *)h;
|
||||
|
||||
api->nal_encode( api->x264, dst, nal );
|
||||
}
|
||||
|
||||
REALIGN_STACK int x264_encoder_reconfig( x264_t *h, x264_param_t *param)
|
||||
{
|
||||
x264_api_t *api = (x264_api_t *)h;
|
||||
|
||||
return api->encoder_reconfig( api->x264, param );
|
||||
}
|
||||
|
||||
REALIGN_STACK void x264_encoder_parameters( x264_t *h, x264_param_t *param )
|
||||
{
|
||||
x264_api_t *api = (x264_api_t *)h;
|
||||
|
||||
api->encoder_parameters( api->x264, param );
|
||||
}
|
||||
|
||||
REALIGN_STACK int x264_encoder_headers( x264_t *h, x264_nal_t **pp_nal, int *pi_nal )
|
||||
{
|
||||
x264_api_t *api = (x264_api_t *)h;
|
||||
|
||||
return api->encoder_headers( api->x264, pp_nal, pi_nal );
|
||||
}
|
||||
|
||||
REALIGN_STACK int x264_encoder_encode( x264_t *h, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_in, x264_picture_t *pic_out )
|
||||
{
|
||||
x264_api_t *api = (x264_api_t *)h;
|
||||
|
||||
return api->encoder_encode( api->x264, pp_nal, pi_nal, pic_in, pic_out );
|
||||
}
|
||||
|
||||
REALIGN_STACK int x264_encoder_delayed_frames( x264_t *h )
|
||||
{
|
||||
x264_api_t *api = (x264_api_t *)h;
|
||||
|
||||
return api->encoder_delayed_frames( api->x264 );
|
||||
}
|
||||
|
||||
REALIGN_STACK int x264_encoder_maximum_delayed_frames( x264_t *h )
|
||||
{
|
||||
x264_api_t *api = (x264_api_t *)h;
|
||||
|
||||
return api->encoder_maximum_delayed_frames( api->x264 );
|
||||
}
|
||||
|
||||
REALIGN_STACK void x264_encoder_intra_refresh( x264_t *h )
|
||||
{
|
||||
x264_api_t *api = (x264_api_t *)h;
|
||||
|
||||
api->encoder_intra_refresh( api->x264 );
|
||||
}
|
||||
|
||||
REALIGN_STACK int x264_encoder_invalidate_reference( x264_t *h, int64_t pts )
|
||||
{
|
||||
x264_api_t *api = (x264_api_t *)h;
|
||||
|
||||
return api->encoder_invalidate_reference( api->x264, pts );
|
||||
}
|
||||
1239
encoder/cabac.c
Normal file
1239
encoder/cabac.c
Normal file
File diff suppressed because it is too large
Load Diff
722
encoder/cavlc.c
Normal file
722
encoder/cavlc.c
Normal file
@@ -0,0 +1,722 @@
|
||||
/*****************************************************************************
|
||||
* cavlc.c: cavlc bitstream writing
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2003-2025 x264 project
|
||||
*
|
||||
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
|
||||
* Loren Merritt <lorenm@u.washington.edu>
|
||||
* Fiona Glaser <fiona@x264.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common/common.h"
|
||||
#include "macroblock.h"
|
||||
|
||||
#ifndef RDO_SKIP_BS
|
||||
#define RDO_SKIP_BS 0
|
||||
#endif
|
||||
|
||||
/* [400,420][inter,intra] */
|
||||
static const uint8_t cbp_to_golomb[2][2][48] =
|
||||
{
|
||||
{{ 0, 1, 2, 5, 3, 6, 14, 10, 4, 15, 7, 11, 8, 12, 13, 9 },
|
||||
{ 1, 10, 11, 6, 12, 7, 14, 2, 13, 15, 8, 3, 9, 4, 5, 0 }},
|
||||
{{ 0, 2, 3, 7, 4, 8, 17, 13, 5, 18, 9, 14, 10, 15, 16, 11,
|
||||
1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
|
||||
6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12 },
|
||||
{ 3, 29, 30, 17, 31, 18, 37, 8, 32, 38, 19, 9, 20, 10, 11, 2,
|
||||
16, 33, 34, 21, 35, 22, 39, 4, 36, 40, 23, 5, 24, 6, 7, 1,
|
||||
41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15, 0 }}
|
||||
};
|
||||
|
||||
static const uint8_t mb_type_b_to_golomb[3][9]=
|
||||
{
|
||||
{ 4, 8, 12, 10, 6, 14, 16, 18, 20 }, /* D_16x8 */
|
||||
{ 5, 9, 13, 11, 7, 15, 17, 19, 21 }, /* D_8x16 */
|
||||
{ 1, -1, -1, -1, 2, -1, -1, -1, 3 } /* D_16x16 */
|
||||
};
|
||||
|
||||
static const uint8_t subpartition_p_to_golomb[4]=
|
||||
{
|
||||
3, 1, 2, 0
|
||||
};
|
||||
|
||||
static const uint8_t subpartition_b_to_golomb[13]=
|
||||
{
|
||||
10, 4, 5, 1, 11, 6, 7, 2, 12, 8, 9, 3, 0
|
||||
};
|
||||
|
||||
#define bs_write_vlc(s,v) bs_write( s, (v).i_size, (v).i_bits )
|
||||
|
||||
/****************************************************************************
|
||||
* x264_cavlc_block_residual:
|
||||
****************************************************************************/
|
||||
static inline int cavlc_block_residual_escape( x264_t *h, int i_suffix_length, int level )
|
||||
{
|
||||
bs_t *s = &h->out.bs;
|
||||
static const uint16_t next_suffix[7] = { 0, 3, 6, 12, 24, 48, 0xffff };
|
||||
int i_level_prefix = 15;
|
||||
int mask = level >> 31;
|
||||
int abs_level = (level^mask)-mask;
|
||||
int i_level_code = abs_level*2-mask-2;
|
||||
if( ( i_level_code >> i_suffix_length ) < 15 )
|
||||
{
|
||||
bs_write( s, (i_level_code >> i_suffix_length) + 1 + i_suffix_length,
|
||||
(1<<i_suffix_length) + (i_level_code & ((1<<i_suffix_length)-1)) );
|
||||
}
|
||||
else
|
||||
{
|
||||
i_level_code -= 15 << i_suffix_length;
|
||||
if( i_suffix_length == 0 )
|
||||
i_level_code -= 15;
|
||||
|
||||
/* If the prefix size exceeds 15, High Profile is required. */
|
||||
if( i_level_code >= 1<<12 )
|
||||
{
|
||||
if( h->sps->i_profile_idc >= PROFILE_HIGH )
|
||||
{
|
||||
while( i_level_code >= 1<<(i_level_prefix-3) )
|
||||
{
|
||||
i_level_code -= 1<<(i_level_prefix-3);
|
||||
i_level_prefix++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
#if RDO_SKIP_BS
|
||||
/* Weight highly against overflows. */
|
||||
s->i_bits_encoded += 2000;
|
||||
#else
|
||||
/* We've had an overflow; note it down and re-encode the MB later. */
|
||||
h->mb.b_overflow = 1;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
bs_write( s, i_level_prefix + 1, 1 );
|
||||
bs_write( s, i_level_prefix - 3, i_level_code & ((1<<(i_level_prefix-3))-1) );
|
||||
}
|
||||
if( i_suffix_length == 0 )
|
||||
i_suffix_length++;
|
||||
if( abs_level > next_suffix[i_suffix_length] )
|
||||
i_suffix_length++;
|
||||
return i_suffix_length;
|
||||
}
|
||||
|
||||
static int cavlc_block_residual_internal( x264_t *h, int ctx_block_cat, dctcoef *l, int nC )
|
||||
{
|
||||
bs_t *s = &h->out.bs;
|
||||
static const uint8_t ctz_index[8] = {3,0,1,0,2,0,1,0};
|
||||
static const uint8_t count_cat[14] = {16, 15, 16, 0, 15, 64, 16, 15, 16, 64, 16, 15, 16, 64};
|
||||
x264_run_level_t runlevel;
|
||||
int i_total, i_trailing, i_total_zero, i_suffix_length;
|
||||
unsigned int i_sign;
|
||||
|
||||
/* level and run and total */
|
||||
i_total = h->quantf.coeff_level_run[ctx_block_cat]( l, &runlevel );
|
||||
x264_prefetch( &x264_run_before[runlevel.mask] );
|
||||
i_total_zero = runlevel.last + 1 - i_total;
|
||||
|
||||
/* branchless i_trailing calculation */
|
||||
runlevel.level[i_total+0] = 2;
|
||||
runlevel.level[i_total+1] = 2;
|
||||
i_trailing = ((((runlevel.level[0]+1) | (1-runlevel.level[0])) >> 31) & 1) // abs(runlevel.level[0])>1
|
||||
| ((((runlevel.level[1]+1) | (1-runlevel.level[1])) >> 31) & 2)
|
||||
| ((((runlevel.level[2]+1) | (1-runlevel.level[2])) >> 31) & 4);
|
||||
i_trailing = ctz_index[i_trailing];
|
||||
i_sign = ((runlevel.level[2] >> 31) & 1)
|
||||
| ((runlevel.level[1] >> 31) & 2)
|
||||
| ((runlevel.level[0] >> 31) & 4);
|
||||
i_sign >>= 3-i_trailing;
|
||||
|
||||
/* total/trailing */
|
||||
bs_write_vlc( s, x264_coeff_token[nC][i_total-1][i_trailing] );
|
||||
|
||||
i_suffix_length = i_total > 10 && i_trailing < 3;
|
||||
bs_write( s, i_trailing, i_sign );
|
||||
|
||||
if( i_trailing < i_total )
|
||||
{
|
||||
int val = runlevel.level[i_trailing];
|
||||
int val_original = runlevel.level[i_trailing]+LEVEL_TABLE_SIZE/2;
|
||||
val -= ((val>>31)|1) & -(i_trailing < 3); /* as runlevel.level[i] can't be 1 for the first one if i_trailing < 3 */
|
||||
val += LEVEL_TABLE_SIZE/2;
|
||||
|
||||
if( (unsigned)val_original < LEVEL_TABLE_SIZE )
|
||||
{
|
||||
bs_write_vlc( s, x264_level_token[i_suffix_length][val] );
|
||||
i_suffix_length = x264_level_token[i_suffix_length][val_original].i_next;
|
||||
}
|
||||
else
|
||||
i_suffix_length = cavlc_block_residual_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
|
||||
for( int i = i_trailing+1; i < i_total; i++ )
|
||||
{
|
||||
val = runlevel.level[i] + LEVEL_TABLE_SIZE/2;
|
||||
if( (unsigned)val < LEVEL_TABLE_SIZE )
|
||||
{
|
||||
bs_write_vlc( s, x264_level_token[i_suffix_length][val] );
|
||||
i_suffix_length = x264_level_token[i_suffix_length][val].i_next;
|
||||
}
|
||||
else
|
||||
i_suffix_length = cavlc_block_residual_escape( h, i_suffix_length, val-LEVEL_TABLE_SIZE/2 );
|
||||
}
|
||||
}
|
||||
|
||||
if( ctx_block_cat == DCT_CHROMA_DC )
|
||||
{
|
||||
if( i_total < 8>>CHROMA_V_SHIFT )
|
||||
{
|
||||
vlc_t total_zeros = CHROMA_FORMAT == CHROMA_420 ? x264_total_zeros_2x2_dc[i_total-1][i_total_zero]
|
||||
: x264_total_zeros_2x4_dc[i_total-1][i_total_zero];
|
||||
bs_write_vlc( s, total_zeros );
|
||||
}
|
||||
}
|
||||
else if( (uint8_t)i_total < count_cat[ctx_block_cat] )
|
||||
bs_write_vlc( s, x264_total_zeros[i_total-1][i_total_zero] );
|
||||
|
||||
int zero_run_code = x264_run_before[runlevel.mask];
|
||||
bs_write( s, zero_run_code&0x1f, zero_run_code>>5 );
|
||||
|
||||
return i_total;
|
||||
}
|
||||
|
||||
static const uint8_t ct_index[17] = {0,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3};
|
||||
|
||||
#define x264_cavlc_block_residual(h,cat,idx,l)\
|
||||
{\
|
||||
int nC = cat == DCT_CHROMA_DC ? 5 - CHROMA_V_SHIFT\
|
||||
: ct_index[x264_mb_predict_non_zero_code( h, cat == DCT_LUMA_DC ? (idx - LUMA_DC)*16 : idx )];\
|
||||
uint8_t *nnz = &h->mb.cache.non_zero_count[x264_scan8[idx]];\
|
||||
if( !*nnz )\
|
||||
bs_write_vlc( &h->out.bs, x264_coeff0_token[nC] );\
|
||||
else\
|
||||
*nnz = cavlc_block_residual_internal(h,cat,l,nC);\
|
||||
}
|
||||
|
||||
static void cavlc_qp_delta( x264_t *h )
|
||||
{
|
||||
bs_t *s = &h->out.bs;
|
||||
int i_dqp = h->mb.i_qp - h->mb.i_last_qp;
|
||||
|
||||
/* Avoid writing a delta quant if we have an empty i16x16 block, e.g. in a completely
|
||||
* flat background area. Don't do this if it would raise the quantizer, since that could
|
||||
* cause unexpected deblocking artifacts. */
|
||||
if( h->mb.i_type == I_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma)
|
||||
&& !h->mb.cache.non_zero_count[x264_scan8[LUMA_DC]]
|
||||
&& !h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+0]]
|
||||
&& !h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+1]]
|
||||
&& h->mb.i_qp > h->mb.i_last_qp )
|
||||
{
|
||||
#if !RDO_SKIP_BS
|
||||
h->mb.i_qp = h->mb.i_last_qp;
|
||||
#endif
|
||||
i_dqp = 0;
|
||||
}
|
||||
|
||||
if( i_dqp )
|
||||
{
|
||||
if( i_dqp < -(QP_MAX_SPEC+1)/2 )
|
||||
i_dqp += QP_MAX_SPEC+1;
|
||||
else if( i_dqp > QP_MAX_SPEC/2 )
|
||||
i_dqp -= QP_MAX_SPEC+1;
|
||||
}
|
||||
bs_write_se( s, i_dqp );
|
||||
}
|
||||
|
||||
static void cavlc_mvd( x264_t *h, int i_list, int idx, int width )
|
||||
{
|
||||
bs_t *s = &h->out.bs;
|
||||
ALIGNED_4( int16_t mvp[2] );
|
||||
x264_mb_predict_mv( h, i_list, idx, width, mvp );
|
||||
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][0] - mvp[0] );
|
||||
bs_write_se( s, h->mb.cache.mv[i_list][x264_scan8[idx]][1] - mvp[1] );
|
||||
}
|
||||
|
||||
static inline void cavlc_8x8_mvd( x264_t *h, int i )
|
||||
{
|
||||
switch( h->mb.i_sub_partition[i] )
|
||||
{
|
||||
case D_L0_8x8:
|
||||
cavlc_mvd( h, 0, 4*i, 2 );
|
||||
break;
|
||||
case D_L0_8x4:
|
||||
cavlc_mvd( h, 0, 4*i+0, 2 );
|
||||
cavlc_mvd( h, 0, 4*i+2, 2 );
|
||||
break;
|
||||
case D_L0_4x8:
|
||||
cavlc_mvd( h, 0, 4*i+0, 1 );
|
||||
cavlc_mvd( h, 0, 4*i+1, 1 );
|
||||
break;
|
||||
case D_L0_4x4:
|
||||
cavlc_mvd( h, 0, 4*i+0, 1 );
|
||||
cavlc_mvd( h, 0, 4*i+1, 1 );
|
||||
cavlc_mvd( h, 0, 4*i+2, 1 );
|
||||
cavlc_mvd( h, 0, 4*i+3, 1 );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE void cavlc_macroblock_luma_residual( x264_t *h, int plane_count )
|
||||
{
|
||||
if( h->mb.b_transform_8x8 )
|
||||
{
|
||||
/* shuffle 8x8 dct coeffs into 4x4 lists */
|
||||
for( int p = 0; p < plane_count; p++ )
|
||||
for( int i8 = 0; i8 < 4; i8++ )
|
||||
if( h->mb.cache.non_zero_count[x264_scan8[p*16+i8*4]] )
|
||||
h->zigzagf.interleave_8x8_cavlc( h->dct.luma4x4[p*16+i8*4], h->dct.luma8x8[p*4+i8],
|
||||
&h->mb.cache.non_zero_count[x264_scan8[p*16+i8*4]] );
|
||||
}
|
||||
|
||||
for( int p = 0; p < plane_count; p++ )
|
||||
FOREACH_BIT( i8, 0, h->mb.i_cbp_luma )
|
||||
for( int i4 = 0; i4 < 4; i4++ )
|
||||
x264_cavlc_block_residual( h, DCT_LUMA_4x4, i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16] );
|
||||
}
|
||||
|
||||
#if RDO_SKIP_BS
|
||||
static ALWAYS_INLINE void cavlc_partition_luma_residual( x264_t *h, int i8, int p )
|
||||
{
|
||||
if( h->mb.b_transform_8x8 && h->mb.cache.non_zero_count[x264_scan8[i8*4+p*16]] )
|
||||
h->zigzagf.interleave_8x8_cavlc( h->dct.luma4x4[i8*4+p*16], h->dct.luma8x8[i8+p*4],
|
||||
&h->mb.cache.non_zero_count[x264_scan8[i8*4+p*16]] );
|
||||
|
||||
if( h->mb.i_cbp_luma & (1 << i8) )
|
||||
for( int i4 = 0; i4 < 4; i4++ )
|
||||
x264_cavlc_block_residual( h, DCT_LUMA_4x4, i4+i8*4+p*16, h->dct.luma4x4[i4+i8*4+p*16] );
|
||||
}
|
||||
#endif
|
||||
|
||||
static void cavlc_mb_header_i( x264_t *h, int i_mb_type, int i_mb_i_offset, int chroma )
|
||||
{
|
||||
bs_t *s = &h->out.bs;
|
||||
if( i_mb_type == I_16x16 )
|
||||
{
|
||||
bs_write_ue( s, i_mb_i_offset + 1 + x264_mb_pred_mode16x16_fix[h->mb.i_intra16x16_pred_mode] +
|
||||
h->mb.i_cbp_chroma * 4 + ( h->mb.i_cbp_luma == 0 ? 0 : 12 ) );
|
||||
}
|
||||
else //if( i_mb_type == I_4x4 || i_mb_type == I_8x8 )
|
||||
{
|
||||
int di = i_mb_type == I_8x8 ? 4 : 1;
|
||||
bs_write_ue( s, i_mb_i_offset + 0 );
|
||||
if( h->pps->b_transform_8x8_mode )
|
||||
bs_write1( s, h->mb.b_transform_8x8 );
|
||||
|
||||
/* Prediction: Luma */
|
||||
for( int i = 0; i < 16; i += di )
|
||||
{
|
||||
int i_pred = x264_mb_predict_intra4x4_mode( h, i );
|
||||
int i_mode = x264_mb_pred_mode4x4_fix( h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] );
|
||||
|
||||
if( i_pred == i_mode )
|
||||
bs_write1( s, 1 ); /* b_prev_intra4x4_pred_mode */
|
||||
else
|
||||
bs_write( s, 4, i_mode - (i_mode > i_pred) );
|
||||
}
|
||||
|
||||
}
|
||||
if( chroma )
|
||||
bs_write_ue( s, x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode] );
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE void cavlc_mb_header_p( x264_t *h, int i_mb_type, int chroma )
|
||||
{
|
||||
bs_t *s = &h->out.bs;
|
||||
if( i_mb_type == P_L0 )
|
||||
{
|
||||
if( h->mb.i_partition == D_16x16 )
|
||||
{
|
||||
bs_write1( s, 1 );
|
||||
|
||||
if( h->mb.pic.i_fref[0] > 1 )
|
||||
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
|
||||
cavlc_mvd( h, 0, 0, 4 );
|
||||
}
|
||||
else if( h->mb.i_partition == D_16x8 )
|
||||
{
|
||||
bs_write_ue( s, 1 );
|
||||
if( h->mb.pic.i_fref[0] > 1 )
|
||||
{
|
||||
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
|
||||
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
|
||||
}
|
||||
cavlc_mvd( h, 0, 0, 4 );
|
||||
cavlc_mvd( h, 0, 8, 4 );
|
||||
}
|
||||
else if( h->mb.i_partition == D_8x16 )
|
||||
{
|
||||
bs_write_ue( s, 2 );
|
||||
if( h->mb.pic.i_fref[0] > 1 )
|
||||
{
|
||||
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
|
||||
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
|
||||
}
|
||||
cavlc_mvd( h, 0, 0, 2 );
|
||||
cavlc_mvd( h, 0, 4, 2 );
|
||||
}
|
||||
}
|
||||
else if( i_mb_type == P_8x8 )
|
||||
{
|
||||
int b_sub_ref;
|
||||
if( (h->mb.cache.ref[0][x264_scan8[0]] | h->mb.cache.ref[0][x264_scan8[ 4]] |
|
||||
h->mb.cache.ref[0][x264_scan8[8]] | h->mb.cache.ref[0][x264_scan8[12]]) == 0 )
|
||||
{
|
||||
bs_write_ue( s, 4 );
|
||||
b_sub_ref = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
bs_write_ue( s, 3 );
|
||||
b_sub_ref = 1;
|
||||
}
|
||||
|
||||
/* sub mb type */
|
||||
if( h->param.analyse.inter & X264_ANALYSE_PSUB8x8 )
|
||||
for( int i = 0; i < 4; i++ )
|
||||
bs_write_ue( s, subpartition_p_to_golomb[ h->mb.i_sub_partition[i] ] );
|
||||
else
|
||||
bs_write( s, 4, 0xf );
|
||||
|
||||
/* ref0 */
|
||||
if( b_sub_ref )
|
||||
{
|
||||
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[0]] );
|
||||
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[4]] );
|
||||
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[8]] );
|
||||
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[12]] );
|
||||
}
|
||||
|
||||
for( int i = 0; i < 4; i++ )
|
||||
cavlc_8x8_mvd( h, i );
|
||||
}
|
||||
else //if( IS_INTRA( i_mb_type ) )
|
||||
cavlc_mb_header_i( h, i_mb_type, 5, chroma );
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE void cavlc_mb_header_b( x264_t *h, int i_mb_type, int chroma )
|
||||
{
|
||||
bs_t *s = &h->out.bs;
|
||||
if( i_mb_type == B_8x8 )
|
||||
{
|
||||
bs_write_ue( s, 22 );
|
||||
|
||||
/* sub mb type */
|
||||
for( int i = 0; i < 4; i++ )
|
||||
bs_write_ue( s, subpartition_b_to_golomb[ h->mb.i_sub_partition[i] ] );
|
||||
|
||||
/* ref */
|
||||
if( h->mb.pic.i_fref[0] > 1 )
|
||||
for( int i = 0; i < 4; i++ )
|
||||
if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
|
||||
bs_write_te( s, h->mb.pic.i_fref[0] - 1, h->mb.cache.ref[0][x264_scan8[i*4]] );
|
||||
if( h->mb.pic.i_fref[1] > 1 )
|
||||
for( int i = 0; i < 4; i++ )
|
||||
if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
|
||||
bs_write_te( s, h->mb.pic.i_fref[1] - 1, h->mb.cache.ref[1][x264_scan8[i*4]] );
|
||||
|
||||
/* mvd */
|
||||
for( int i = 0; i < 4; i++ )
|
||||
if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i] ] )
|
||||
cavlc_mvd( h, 0, 4*i, 2 );
|
||||
for( int i = 0; i < 4; i++ )
|
||||
if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i] ] )
|
||||
cavlc_mvd( h, 1, 4*i, 2 );
|
||||
}
|
||||
else if( i_mb_type >= B_L0_L0 && i_mb_type <= B_BI_BI )
|
||||
{
|
||||
/* All B mode */
|
||||
/* Motion Vector */
|
||||
const uint8_t (*b_list)[2] = x264_mb_type_list_table[i_mb_type];
|
||||
const int i_ref0_max = h->mb.pic.i_fref[0] - 1;
|
||||
const int i_ref1_max = h->mb.pic.i_fref[1] - 1;
|
||||
|
||||
bs_write_ue( s, mb_type_b_to_golomb[ h->mb.i_partition - D_16x8 ][ i_mb_type - B_L0_L0 ] );
|
||||
if( h->mb.i_partition == D_16x16 )
|
||||
{
|
||||
if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[0]] );
|
||||
if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[0]] );
|
||||
if( b_list[0][0] ) cavlc_mvd( h, 0, 0, 4 );
|
||||
if( b_list[1][0] ) cavlc_mvd( h, 1, 0, 4 );
|
||||
}
|
||||
else
|
||||
{
|
||||
if( i_ref0_max && b_list[0][0] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[ 0]] );
|
||||
if( i_ref0_max && b_list[0][1] ) bs_write_te( s, i_ref0_max, h->mb.cache.ref[0][x264_scan8[12]] );
|
||||
if( i_ref1_max && b_list[1][0] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[ 0]] );
|
||||
if( i_ref1_max && b_list[1][1] ) bs_write_te( s, i_ref1_max, h->mb.cache.ref[1][x264_scan8[12]] );
|
||||
if( h->mb.i_partition == D_16x8 )
|
||||
{
|
||||
if( b_list[0][0] ) cavlc_mvd( h, 0, 0, 4 );
|
||||
if( b_list[0][1] ) cavlc_mvd( h, 0, 8, 4 );
|
||||
if( b_list[1][0] ) cavlc_mvd( h, 1, 0, 4 );
|
||||
if( b_list[1][1] ) cavlc_mvd( h, 1, 8, 4 );
|
||||
}
|
||||
else //if( h->mb.i_partition == D_8x16 )
|
||||
{
|
||||
if( b_list[0][0] ) cavlc_mvd( h, 0, 0, 2 );
|
||||
if( b_list[0][1] ) cavlc_mvd( h, 0, 4, 2 );
|
||||
if( b_list[1][0] ) cavlc_mvd( h, 1, 0, 2 );
|
||||
if( b_list[1][1] ) cavlc_mvd( h, 1, 4, 2 );
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( i_mb_type == B_DIRECT )
|
||||
bs_write1( s, 1 );
|
||||
else //if( IS_INTRA( i_mb_type ) )
|
||||
cavlc_mb_header_i( h, i_mb_type, 23, chroma );
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* x264_macroblock_write:
|
||||
*****************************************************************************/
|
||||
void x264_macroblock_write_cavlc( x264_t *h )
|
||||
{
|
||||
bs_t *s = &h->out.bs;
|
||||
const int i_mb_type = h->mb.i_type;
|
||||
int plane_count = CHROMA444 ? 3 : 1;
|
||||
int chroma = CHROMA_FORMAT == CHROMA_420 || CHROMA_FORMAT == CHROMA_422;
|
||||
|
||||
#if RDO_SKIP_BS
|
||||
s->i_bits_encoded = 0;
|
||||
#else
|
||||
const int i_mb_pos_start = bs_pos( s );
|
||||
int i_mb_pos_tex;
|
||||
#endif
|
||||
|
||||
if( SLICE_MBAFF
|
||||
&& (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) )
|
||||
{
|
||||
bs_write1( s, MB_INTERLACED );
|
||||
#if !RDO_SKIP_BS
|
||||
h->mb.field_decoding_flag = MB_INTERLACED;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if !RDO_SKIP_BS
|
||||
if( i_mb_type == I_PCM )
|
||||
{
|
||||
static const uint8_t i_offsets[3] = {5,23,0};
|
||||
uint8_t *p_start = s->p_start;
|
||||
bs_write_ue( s, i_offsets[h->sh.i_type] + 25 );
|
||||
i_mb_pos_tex = bs_pos( s );
|
||||
h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
|
||||
|
||||
bs_align_0( s );
|
||||
|
||||
for( int p = 0; p < plane_count; p++ )
|
||||
for( int i = 0; i < 256; i++ )
|
||||
bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[p][i] );
|
||||
if( chroma )
|
||||
for( int ch = 1; ch < 3; ch++ )
|
||||
for( int i = 0; i < 16>>CHROMA_V_SHIFT; i++ )
|
||||
for( int j = 0; j < 8; j++ )
|
||||
bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] );
|
||||
|
||||
bs_init( s, s->p, s->p_end - s->p );
|
||||
s->p_start = p_start;
|
||||
|
||||
h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if( h->sh.i_type == SLICE_TYPE_P )
|
||||
cavlc_mb_header_p( h, i_mb_type, chroma );
|
||||
else if( h->sh.i_type == SLICE_TYPE_B )
|
||||
cavlc_mb_header_b( h, i_mb_type, chroma );
|
||||
else //if( h->sh.i_type == SLICE_TYPE_I )
|
||||
cavlc_mb_header_i( h, i_mb_type, 0, chroma );
|
||||
|
||||
#if !RDO_SKIP_BS
|
||||
i_mb_pos_tex = bs_pos( s );
|
||||
h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start;
|
||||
#endif
|
||||
|
||||
/* Coded block pattern */
|
||||
if( i_mb_type != I_16x16 )
|
||||
bs_write_ue( s, cbp_to_golomb[chroma][IS_INTRA(i_mb_type)][(h->mb.i_cbp_chroma << 4)|h->mb.i_cbp_luma] );
|
||||
|
||||
/* transform size 8x8 flag */
|
||||
if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma )
|
||||
bs_write1( s, h->mb.b_transform_8x8 );
|
||||
|
||||
if( i_mb_type == I_16x16 )
|
||||
{
|
||||
cavlc_qp_delta( h );
|
||||
|
||||
/* DC Luma */
|
||||
for( int p = 0; p < plane_count; p++ )
|
||||
{
|
||||
x264_cavlc_block_residual( h, DCT_LUMA_DC, LUMA_DC+p, h->dct.luma16x16_dc[p] );
|
||||
|
||||
/* AC Luma */
|
||||
if( h->mb.i_cbp_luma )
|
||||
for( int i = p*16; i < p*16+16; i++ )
|
||||
x264_cavlc_block_residual( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 );
|
||||
}
|
||||
}
|
||||
else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma )
|
||||
{
|
||||
cavlc_qp_delta( h );
|
||||
cavlc_macroblock_luma_residual( h, plane_count );
|
||||
}
|
||||
if( h->mb.i_cbp_chroma )
|
||||
{
|
||||
/* Chroma DC residual present */
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0] );
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1] );
|
||||
if( h->mb.i_cbp_chroma == 2 ) /* Chroma AC residual present */
|
||||
{
|
||||
int step = 8 << CHROMA_V_SHIFT;
|
||||
for( int i = 16; i < 3*16; i += step )
|
||||
for( int j = i; j < i+4; j++ )
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1 );
|
||||
}
|
||||
}
|
||||
|
||||
#if !RDO_SKIP_BS
|
||||
h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if RDO_SKIP_BS
|
||||
/*****************************************************************************
|
||||
* RD only; doesn't generate a valid bitstream
|
||||
* doesn't write cbp or chroma dc (I don't know how much this matters)
|
||||
* doesn't write ref (never varies between calls, so no point in doing so)
|
||||
* only writes subpartition for p8x8, needed for sub-8x8 mode decision RDO
|
||||
* works on all partition sizes except 16x16
|
||||
*****************************************************************************/
|
||||
static int partition_size_cavlc( x264_t *h, int i8, int i_pixel )
|
||||
{
|
||||
bs_t *s = &h->out.bs;
|
||||
const int i_mb_type = h->mb.i_type;
|
||||
int b_8x16 = h->mb.i_partition == D_8x16;
|
||||
int plane_count = CHROMA444 ? 3 : 1;
|
||||
int j;
|
||||
|
||||
h->out.bs.i_bits_encoded = 0;
|
||||
|
||||
if( i_mb_type == P_8x8 )
|
||||
{
|
||||
cavlc_8x8_mvd( h, i8 );
|
||||
bs_write_ue( s, subpartition_p_to_golomb[ h->mb.i_sub_partition[i8] ] );
|
||||
}
|
||||
else if( i_mb_type == P_L0 )
|
||||
cavlc_mvd( h, 0, 4*i8, 4>>b_8x16 );
|
||||
else if( i_mb_type > B_DIRECT && i_mb_type < B_8x8 )
|
||||
{
|
||||
if( x264_mb_type_list_table[ i_mb_type ][0][!!i8] ) cavlc_mvd( h, 0, 4*i8, 4>>b_8x16 );
|
||||
if( x264_mb_type_list_table[ i_mb_type ][1][!!i8] ) cavlc_mvd( h, 1, 4*i8, 4>>b_8x16 );
|
||||
}
|
||||
else //if( i_mb_type == B_8x8 )
|
||||
{
|
||||
if( x264_mb_partition_listX_table[0][ h->mb.i_sub_partition[i8] ] )
|
||||
cavlc_mvd( h, 0, 4*i8, 2 );
|
||||
if( x264_mb_partition_listX_table[1][ h->mb.i_sub_partition[i8] ] )
|
||||
cavlc_mvd( h, 1, 4*i8, 2 );
|
||||
}
|
||||
|
||||
for( j = (i_pixel < PIXEL_8x8); j >= 0; j-- )
|
||||
{
|
||||
for( int p = 0; p < plane_count; p++ )
|
||||
cavlc_partition_luma_residual( h, i8, p );
|
||||
if( h->mb.i_cbp_chroma )
|
||||
{
|
||||
if( CHROMA_FORMAT == CHROMA_422 )
|
||||
{
|
||||
int offset = (5*i8) & 0x09;
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_AC, 16+offset, h->dct.luma4x4[16+offset]+1 );
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_AC, 18+offset, h->dct.luma4x4[18+offset]+1 );
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_AC, 32+offset, h->dct.luma4x4[32+offset]+1 );
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_AC, 34+offset, h->dct.luma4x4[34+offset]+1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_AC, 16+i8, h->dct.luma4x4[16+i8]+1 );
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_AC, 32+i8, h->dct.luma4x4[32+i8]+1 );
|
||||
}
|
||||
}
|
||||
i8 += x264_pixel_size[i_pixel].h >> 3;
|
||||
}
|
||||
|
||||
return h->out.bs.i_bits_encoded;
|
||||
}
|
||||
|
||||
static int subpartition_size_cavlc( x264_t *h, int i4, int i_pixel )
|
||||
{
|
||||
int plane_count = CHROMA444 ? 3 : 1;
|
||||
int b_8x4 = i_pixel == PIXEL_8x4;
|
||||
h->out.bs.i_bits_encoded = 0;
|
||||
cavlc_mvd( h, 0, i4, 1+b_8x4 );
|
||||
for( int p = 0; p < plane_count; p++ )
|
||||
{
|
||||
x264_cavlc_block_residual( h, DCT_LUMA_4x4, p*16+i4, h->dct.luma4x4[p*16+i4] );
|
||||
if( i_pixel != PIXEL_4x4 )
|
||||
x264_cavlc_block_residual( h, DCT_LUMA_4x4, p*16+i4+2-b_8x4, h->dct.luma4x4[p*16+i4+2-b_8x4] );
|
||||
}
|
||||
|
||||
return h->out.bs.i_bits_encoded;
|
||||
}
|
||||
|
||||
static int cavlc_intra4x4_pred_size( x264_t *h, int i4, int i_mode )
|
||||
{
|
||||
if( x264_mb_predict_intra4x4_mode( h, i4 ) == x264_mb_pred_mode4x4_fix( i_mode ) )
|
||||
return 1;
|
||||
else
|
||||
return 4;
|
||||
}
|
||||
|
||||
static int partition_i8x8_size_cavlc( x264_t *h, int i8, int i_mode )
|
||||
{
|
||||
int plane_count = CHROMA444 ? 3 : 1;
|
||||
h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, 4*i8, i_mode );
|
||||
bs_write_ue( &h->out.bs, cbp_to_golomb[!CHROMA444][1][(h->mb.i_cbp_chroma << 4)|h->mb.i_cbp_luma] );
|
||||
for( int p = 0; p < plane_count; p++ )
|
||||
cavlc_partition_luma_residual( h, i8, p );
|
||||
return h->out.bs.i_bits_encoded;
|
||||
}
|
||||
|
||||
static int partition_i4x4_size_cavlc( x264_t *h, int i4, int i_mode )
|
||||
{
|
||||
int plane_count = CHROMA444 ? 3 : 1;
|
||||
h->out.bs.i_bits_encoded = cavlc_intra4x4_pred_size( h, i4, i_mode );
|
||||
for( int p = 0; p < plane_count; p++ )
|
||||
x264_cavlc_block_residual( h, DCT_LUMA_4x4, p*16+i4, h->dct.luma4x4[p*16+i4] );
|
||||
return h->out.bs.i_bits_encoded;
|
||||
}
|
||||
|
||||
static int chroma_size_cavlc( x264_t *h )
|
||||
{
|
||||
h->out.bs.i_bits_encoded = bs_size_ue( x264_mb_chroma_pred_mode_fix[h->mb.i_chroma_pred_mode] );
|
||||
if( h->mb.i_cbp_chroma )
|
||||
{
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0] );
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1] );
|
||||
|
||||
if( h->mb.i_cbp_chroma == 2 )
|
||||
{
|
||||
int step = 8 << CHROMA_V_SHIFT;
|
||||
for( int i = 16; i < 3*16; i += step )
|
||||
for( int j = i; j < i+4; j++ )
|
||||
x264_cavlc_block_residual( h, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1 );
|
||||
}
|
||||
}
|
||||
return h->out.bs.i_bits_encoded;
|
||||
}
|
||||
#endif
|
||||
4603
encoder/encoder.c
Normal file
4603
encoder/encoder.c
Normal file
File diff suppressed because it is too large
Load Diff
250
encoder/lookahead.c
Normal file
250
encoder/lookahead.c
Normal file
@@ -0,0 +1,250 @@
|
||||
/*****************************************************************************
|
||||
* lookahead.c: high-level lookahead functions
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2010-2025 Avail Media and x264 project
|
||||
*
|
||||
* Authors: Michael Kazmier <mkazmier@availmedia.com>
|
||||
* Alex Giladi <agiladi@availmedia.com>
|
||||
* Steven Walters <kemuri9@gmail.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
/* LOOKAHEAD (threaded and non-threaded mode)
|
||||
*
|
||||
* Lookahead types:
|
||||
* [1] Slice type / scene cut;
|
||||
*
|
||||
* In non-threaded mode, we run the existing slicetype decision code as it was.
|
||||
* In threaded mode, we run in a separate thread, that lives between the calls
|
||||
* to x264_encoder_open() and x264_encoder_close(), and performs lookahead for
|
||||
* the number of frames specified in rc_lookahead. Recommended setting is
|
||||
* # of bframes + # of threads.
|
||||
*/
|
||||
#include "common/common.h"
|
||||
#include "analyse.h"
|
||||
|
||||
static void lookahead_shift( x264_sync_frame_list_t *dst, x264_sync_frame_list_t *src, int count )
|
||||
{
|
||||
int i = count;
|
||||
while( i-- )
|
||||
{
|
||||
assert( dst->i_size < dst->i_max_size );
|
||||
assert( src->i_size );
|
||||
dst->list[ dst->i_size++ ] = x264_frame_shift( src->list );
|
||||
src->i_size--;
|
||||
}
|
||||
if( count )
|
||||
{
|
||||
x264_pthread_cond_broadcast( &dst->cv_fill );
|
||||
x264_pthread_cond_broadcast( &src->cv_empty );
|
||||
}
|
||||
}
|
||||
|
||||
static void lookahead_update_last_nonb( x264_t *h, x264_frame_t *new_nonb )
|
||||
{
|
||||
if( h->lookahead->last_nonb )
|
||||
x264_frame_push_unused( h, h->lookahead->last_nonb );
|
||||
h->lookahead->last_nonb = new_nonb;
|
||||
new_nonb->i_reference_count++;
|
||||
}
|
||||
|
||||
#if HAVE_THREAD
|
||||
static void lookahead_slicetype_decide( x264_t *h )
|
||||
{
|
||||
x264_slicetype_decide( h );
|
||||
|
||||
lookahead_update_last_nonb( h, h->lookahead->next.list[0] );
|
||||
int shift_frames = h->lookahead->next.list[0]->i_bframes + 1;
|
||||
|
||||
x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
|
||||
while( h->lookahead->ofbuf.i_size == h->lookahead->ofbuf.i_max_size )
|
||||
x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_empty, &h->lookahead->ofbuf.mutex );
|
||||
|
||||
x264_pthread_mutex_lock( &h->lookahead->next.mutex );
|
||||
lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames );
|
||||
x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
|
||||
|
||||
/* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
|
||||
if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
|
||||
x264_slicetype_analyse( h, shift_frames );
|
||||
|
||||
x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
|
||||
}
|
||||
|
||||
REALIGN_STACK static void *lookahead_thread( x264_t *h )
|
||||
{
|
||||
while( 1 )
|
||||
{
|
||||
x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
|
||||
if( h->lookahead->b_exit_thread )
|
||||
{
|
||||
x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
|
||||
break;
|
||||
}
|
||||
x264_pthread_mutex_lock( &h->lookahead->next.mutex );
|
||||
int shift = X264_MIN( h->lookahead->next.i_max_size - h->lookahead->next.i_size, h->lookahead->ifbuf.i_size );
|
||||
lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, shift );
|
||||
x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
|
||||
if( h->lookahead->next.i_size <= h->lookahead->i_slicetype_length + h->param.b_vfr_input )
|
||||
{
|
||||
while( !h->lookahead->ifbuf.i_size && !h->lookahead->b_exit_thread )
|
||||
x264_pthread_cond_wait( &h->lookahead->ifbuf.cv_fill, &h->lookahead->ifbuf.mutex );
|
||||
x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
|
||||
}
|
||||
else
|
||||
{
|
||||
x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
|
||||
lookahead_slicetype_decide( h );
|
||||
}
|
||||
} /* end of input frames */
|
||||
x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
|
||||
x264_pthread_mutex_lock( &h->lookahead->next.mutex );
|
||||
lookahead_shift( &h->lookahead->next, &h->lookahead->ifbuf, h->lookahead->ifbuf.i_size );
|
||||
x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
|
||||
x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
|
||||
while( h->lookahead->next.i_size )
|
||||
lookahead_slicetype_decide( h );
|
||||
x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
|
||||
h->lookahead->b_thread_active = 0;
|
||||
x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_fill );
|
||||
x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int x264_lookahead_init( x264_t *h, int i_slicetype_length )
|
||||
{
|
||||
x264_lookahead_t *look;
|
||||
CHECKED_MALLOCZERO( look, sizeof(x264_lookahead_t) );
|
||||
for( int i = 0; i < h->param.i_threads; i++ )
|
||||
h->thread[i]->lookahead = look;
|
||||
|
||||
look->i_last_keyframe = - h->param.i_keyint_max;
|
||||
look->b_analyse_keyframe = (h->param.rc.b_mb_tree || (h->param.rc.i_vbv_buffer_size && h->param.rc.i_lookahead))
|
||||
&& !h->param.rc.b_stat_read;
|
||||
look->i_slicetype_length = i_slicetype_length;
|
||||
|
||||
/* init frame lists */
|
||||
if( x264_sync_frame_list_init( &look->ifbuf, h->param.i_sync_lookahead+3 ) ||
|
||||
x264_sync_frame_list_init( &look->next, h->frames.i_delay+3 ) ||
|
||||
x264_sync_frame_list_init( &look->ofbuf, h->frames.i_delay+3 ) )
|
||||
goto fail;
|
||||
|
||||
if( !h->param.i_sync_lookahead )
|
||||
return 0;
|
||||
|
||||
x264_t *look_h = h->thread[h->param.i_threads];
|
||||
*look_h = *h;
|
||||
if( x264_macroblock_cache_allocate( look_h ) )
|
||||
goto fail;
|
||||
|
||||
if( x264_macroblock_thread_allocate( look_h, 1 ) < 0 )
|
||||
goto fail;
|
||||
|
||||
if( x264_pthread_create( &look->thread_handle, NULL, (void*)lookahead_thread, look_h ) )
|
||||
goto fail;
|
||||
look->b_thread_active = 1;
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
x264_free( look );
|
||||
return -1;
|
||||
}
|
||||
|
||||
void x264_lookahead_delete( x264_t *h )
|
||||
{
|
||||
if( h->param.i_sync_lookahead )
|
||||
{
|
||||
x264_pthread_mutex_lock( &h->lookahead->ifbuf.mutex );
|
||||
h->lookahead->b_exit_thread = 1;
|
||||
x264_pthread_cond_broadcast( &h->lookahead->ifbuf.cv_fill );
|
||||
x264_pthread_mutex_unlock( &h->lookahead->ifbuf.mutex );
|
||||
x264_pthread_join( h->lookahead->thread_handle, NULL );
|
||||
x264_macroblock_cache_free( h->thread[h->param.i_threads] );
|
||||
x264_macroblock_thread_free( h->thread[h->param.i_threads], 1 );
|
||||
x264_free( h->thread[h->param.i_threads] );
|
||||
}
|
||||
x264_sync_frame_list_delete( &h->lookahead->ifbuf );
|
||||
x264_sync_frame_list_delete( &h->lookahead->next );
|
||||
if( h->lookahead->last_nonb )
|
||||
x264_frame_push_unused( h, h->lookahead->last_nonb );
|
||||
x264_sync_frame_list_delete( &h->lookahead->ofbuf );
|
||||
x264_free( h->lookahead );
|
||||
}
|
||||
|
||||
void x264_lookahead_put_frame( x264_t *h, x264_frame_t *frame )
|
||||
{
|
||||
if( h->param.i_sync_lookahead )
|
||||
x264_sync_frame_list_push( &h->lookahead->ifbuf, frame );
|
||||
else
|
||||
x264_sync_frame_list_push( &h->lookahead->next, frame );
|
||||
}
|
||||
|
||||
int x264_lookahead_is_empty( x264_t *h )
|
||||
{
|
||||
x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
|
||||
x264_pthread_mutex_lock( &h->lookahead->next.mutex );
|
||||
int b_empty = !h->lookahead->next.i_size && !h->lookahead->ofbuf.i_size;
|
||||
x264_pthread_mutex_unlock( &h->lookahead->next.mutex );
|
||||
x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
|
||||
return b_empty;
|
||||
}
|
||||
|
||||
static void lookahead_encoder_shift( x264_t *h )
|
||||
{
|
||||
if( !h->lookahead->ofbuf.i_size )
|
||||
return;
|
||||
int i_frames = h->lookahead->ofbuf.list[0]->i_bframes + 1;
|
||||
while( i_frames-- )
|
||||
{
|
||||
x264_frame_push( h->frames.current, x264_frame_shift( h->lookahead->ofbuf.list ) );
|
||||
h->lookahead->ofbuf.i_size--;
|
||||
}
|
||||
x264_pthread_cond_broadcast( &h->lookahead->ofbuf.cv_empty );
|
||||
}
|
||||
|
||||
void x264_lookahead_get_frames( x264_t *h )
|
||||
{
|
||||
if( h->param.i_sync_lookahead )
|
||||
{ /* We have a lookahead thread, so get frames from there */
|
||||
x264_pthread_mutex_lock( &h->lookahead->ofbuf.mutex );
|
||||
while( !h->lookahead->ofbuf.i_size && h->lookahead->b_thread_active )
|
||||
x264_pthread_cond_wait( &h->lookahead->ofbuf.cv_fill, &h->lookahead->ofbuf.mutex );
|
||||
lookahead_encoder_shift( h );
|
||||
x264_pthread_mutex_unlock( &h->lookahead->ofbuf.mutex );
|
||||
}
|
||||
else
|
||||
{ /* We are not running a lookahead thread, so perform all the slicetype decide on the fly */
|
||||
|
||||
if( h->frames.current[0] || !h->lookahead->next.i_size )
|
||||
return;
|
||||
|
||||
x264_slicetype_decide( h );
|
||||
lookahead_update_last_nonb( h, h->lookahead->next.list[0] );
|
||||
int shift_frames = h->lookahead->next.list[0]->i_bframes + 1;
|
||||
lookahead_shift( &h->lookahead->ofbuf, &h->lookahead->next, shift_frames );
|
||||
|
||||
/* For MB-tree and VBV lookahead, we have to perform propagation analysis on I-frames too. */
|
||||
if( h->lookahead->b_analyse_keyframe && IS_X264_TYPE_I( h->lookahead->last_nonb->i_type ) )
|
||||
x264_slicetype_analyse( h, shift_frames );
|
||||
|
||||
lookahead_encoder_shift( h );
|
||||
}
|
||||
}
|
||||
1425
encoder/macroblock.c
Normal file
1425
encoder/macroblock.c
Normal file
File diff suppressed because it is too large
Load Diff
215
encoder/macroblock.h
Normal file
215
encoder/macroblock.h
Normal file
@@ -0,0 +1,215 @@
|
||||
/*****************************************************************************
|
||||
* macroblock.h: macroblock encoding
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2003-2025 x264 project
|
||||
*
|
||||
* Authors: Loren Merritt <lorenm@u.washington.edu>
|
||||
* Laurent Aimar <fenrir@via.ecp.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_ENCODER_MACROBLOCK_H
|
||||
#define X264_ENCODER_MACROBLOCK_H
|
||||
|
||||
#include "common/macroblock.h"
|
||||
|
||||
#define x264_rdo_init x264_template(rdo_init)
|
||||
void x264_rdo_init( void );
|
||||
|
||||
#define x264_macroblock_probe_skip x264_template(macroblock_probe_skip)
|
||||
int x264_macroblock_probe_skip( x264_t *h, int b_bidir );
|
||||
|
||||
#define x264_macroblock_probe_pskip( h )\
|
||||
x264_macroblock_probe_skip( h, 0 )
|
||||
#define x264_macroblock_probe_bskip( h )\
|
||||
x264_macroblock_probe_skip( h, 1 )
|
||||
|
||||
#define x264_predict_lossless_4x4 x264_template(predict_lossless_4x4)
|
||||
void x264_predict_lossless_4x4( x264_t *h, pixel *p_dst, int p, int idx, int i_mode );
|
||||
#define x264_predict_lossless_8x8 x264_template(predict_lossless_8x8)
|
||||
void x264_predict_lossless_8x8( x264_t *h, pixel *p_dst, int p, int idx, int i_mode, pixel edge[36] );
|
||||
#define x264_predict_lossless_16x16 x264_template(predict_lossless_16x16)
|
||||
void x264_predict_lossless_16x16( x264_t *h, int p, int i_mode );
|
||||
#define x264_predict_lossless_chroma x264_template(predict_lossless_chroma)
|
||||
void x264_predict_lossless_chroma( x264_t *h, int i_mode );
|
||||
|
||||
#define x264_macroblock_encode x264_template(macroblock_encode)
|
||||
void x264_macroblock_encode ( x264_t *h );
|
||||
#define x264_macroblock_write_cabac x264_template(macroblock_write_cabac)
|
||||
void x264_macroblock_write_cabac ( x264_t *h, x264_cabac_t *cb );
|
||||
#define x264_macroblock_write_cavlc x264_template(macroblock_write_cavlc)
|
||||
void x264_macroblock_write_cavlc ( x264_t *h );
|
||||
|
||||
#define x264_macroblock_encode_p8x8 x264_template(macroblock_encode_p8x8)
|
||||
void x264_macroblock_encode_p8x8( x264_t *h, int i8 );
|
||||
#define x264_macroblock_encode_p4x4 x264_template(macroblock_encode_p4x4)
|
||||
void x264_macroblock_encode_p4x4( x264_t *h, int i4 );
|
||||
#define x264_mb_encode_chroma x264_template(mb_encode_chroma)
|
||||
void x264_mb_encode_chroma( x264_t *h, int b_inter, int i_qp );
|
||||
|
||||
#define x264_cabac_mb_skip x264_template(cabac_mb_skip)
|
||||
void x264_cabac_mb_skip( x264_t *h, int b_skip );
|
||||
#define x264_cabac_block_residual_c x264_template(cabac_block_residual_c)
|
||||
void x264_cabac_block_residual_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l );
|
||||
#define x264_cabac_block_residual_8x8_rd_c x264_template(cabac_block_residual_8x8_rd_c)
|
||||
void x264_cabac_block_residual_8x8_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l );
|
||||
#define x264_cabac_block_residual_rd_c x264_template(cabac_block_residual_rd_c)
|
||||
void x264_cabac_block_residual_rd_c( x264_t *h, x264_cabac_t *cb, int ctx_block_cat, dctcoef *l );
|
||||
|
||||
#define x264_quant_luma_dc_trellis x264_template(quant_luma_dc_trellis)
|
||||
int x264_quant_luma_dc_trellis( x264_t *h, dctcoef *dct, int i_quant_cat, int i_qp,
|
||||
int ctx_block_cat, int b_intra, int idx );
|
||||
#define x264_quant_chroma_dc_trellis x264_template(quant_chroma_dc_trellis)
|
||||
int x264_quant_chroma_dc_trellis( x264_t *h, dctcoef *dct, int i_qp, int b_intra, int idx );
|
||||
#define x264_quant_4x4_trellis x264_template(quant_4x4_trellis)
|
||||
int x264_quant_4x4_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
|
||||
int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx );
|
||||
#define x264_quant_8x8_trellis x264_template(quant_8x8_trellis)
|
||||
int x264_quant_8x8_trellis( x264_t *h, dctcoef *dct, int i_quant_cat,
|
||||
int i_qp, int ctx_block_cat, int b_intra, int b_chroma, int idx );
|
||||
|
||||
#define x264_noise_reduction_update x264_template(noise_reduction_update)
|
||||
void x264_noise_reduction_update( x264_t *h );
|
||||
|
||||
static ALWAYS_INLINE int x264_quant_4x4( x264_t *h, dctcoef dct[16], int i_qp, int ctx_block_cat, int b_intra, int p, int idx )
|
||||
{
|
||||
int i_quant_cat = b_intra ? (p?CQM_4IC:CQM_4IY) : (p?CQM_4PC:CQM_4PY);
|
||||
if( h->mb.b_noise_reduction )
|
||||
h->quantf.denoise_dct( dct, h->nr_residual_sum[0+!!p*2], h->nr_offset[0+!!p*2], 16 );
|
||||
if( h->mb.b_trellis )
|
||||
return x264_quant_4x4_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, !!p, idx+p*16 );
|
||||
else
|
||||
return h->quantf.quant_4x4( dct, h->quant4_mf[i_quant_cat][i_qp], h->quant4_bias[i_quant_cat][i_qp] );
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE int x264_quant_8x8( x264_t *h, dctcoef dct[64], int i_qp, int ctx_block_cat, int b_intra, int p, int idx )
|
||||
{
|
||||
int i_quant_cat = b_intra ? (p?CQM_8IC:CQM_8IY) : (p?CQM_8PC:CQM_8PY);
|
||||
if( h->mb.b_noise_reduction )
|
||||
h->quantf.denoise_dct( dct, h->nr_residual_sum[1+!!p*2], h->nr_offset[1+!!p*2], 64 );
|
||||
if( h->mb.b_trellis )
|
||||
return x264_quant_8x8_trellis( h, dct, i_quant_cat, i_qp, ctx_block_cat, b_intra, !!p, idx+p*4 );
|
||||
else
|
||||
return h->quantf.quant_8x8( dct, h->quant8_mf[i_quant_cat][i_qp], h->quant8_bias[i_quant_cat][i_qp] );
|
||||
}
|
||||
|
||||
#define STORE_8x8_NNZ( p, idx, nz )\
|
||||
do\
|
||||
{\
|
||||
M16( &h->mb.cache.non_zero_count[x264_scan8[p*16+idx*4]+0] ) = (nz) * 0x0101;\
|
||||
M16( &h->mb.cache.non_zero_count[x264_scan8[p*16+idx*4]+8] ) = (nz) * 0x0101;\
|
||||
} while( 0 )
|
||||
|
||||
#define CLEAR_16x16_NNZ( p ) \
|
||||
do\
|
||||
{\
|
||||
M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 0*8] ) = 0;\
|
||||
M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 1*8] ) = 0;\
|
||||
M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 2*8] ) = 0;\
|
||||
M32( &h->mb.cache.non_zero_count[x264_scan8[16*p] + 3*8] ) = 0;\
|
||||
} while( 0 )
|
||||
|
||||
/* A special for loop that iterates branchlessly over each set
|
||||
* bit in a 4-bit input. */
|
||||
#define FOREACH_BIT(idx,start,mask) for( int idx = start, msk = mask, skip; msk && (skip = x264_ctz_4bit(msk), idx += skip, msk >>= skip+1, 1); idx++ )
|
||||
|
||||
static ALWAYS_INLINE void x264_mb_encode_i4x4( x264_t *h, int p, int idx, int i_qp, int i_mode, int b_predict )
|
||||
{
|
||||
int nz;
|
||||
pixel *p_src = &h->mb.pic.p_fenc[p][block_idx_xy_fenc[idx]];
|
||||
pixel *p_dst = &h->mb.pic.p_fdec[p][block_idx_xy_fdec[idx]];
|
||||
ALIGNED_ARRAY_64( dctcoef, dct4x4,[16] );
|
||||
|
||||
if( b_predict )
|
||||
{
|
||||
if( h->mb.b_lossless )
|
||||
x264_predict_lossless_4x4( h, p_dst, p, idx, i_mode );
|
||||
else
|
||||
h->predict_4x4[i_mode]( p_dst );
|
||||
}
|
||||
|
||||
if( h->mb.b_lossless )
|
||||
{
|
||||
nz = h->zigzagf.sub_4x4( h->dct.luma4x4[p*16+idx], p_src, p_dst );
|
||||
h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = nz;
|
||||
h->mb.i_cbp_luma |= nz<<(idx>>2);
|
||||
return;
|
||||
}
|
||||
|
||||
h->dctf.sub4x4_dct( dct4x4, p_src, p_dst );
|
||||
|
||||
nz = x264_quant_4x4( h, dct4x4, i_qp, ctx_cat_plane[DCT_LUMA_4x4][p], 1, p, idx );
|
||||
h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = nz;
|
||||
if( nz )
|
||||
{
|
||||
h->mb.i_cbp_luma |= 1<<(idx>>2);
|
||||
h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+idx], dct4x4 );
|
||||
h->quantf.dequant_4x4( dct4x4, h->dequant4_mf[p?CQM_4IC:CQM_4IY], i_qp );
|
||||
h->dctf.add4x4_idct( p_dst, dct4x4 );
|
||||
}
|
||||
}
|
||||
|
||||
static ALWAYS_INLINE void x264_mb_encode_i8x8( x264_t *h, int p, int idx, int i_qp, int i_mode, pixel *edge, int b_predict )
|
||||
{
|
||||
int x = idx&1;
|
||||
int y = idx>>1;
|
||||
int nz;
|
||||
pixel *p_src = &h->mb.pic.p_fenc[p][8*x + 8*y*FENC_STRIDE];
|
||||
pixel *p_dst = &h->mb.pic.p_fdec[p][8*x + 8*y*FDEC_STRIDE];
|
||||
ALIGNED_ARRAY_64( dctcoef, dct8x8,[64] );
|
||||
ALIGNED_ARRAY_32( pixel, edge_buf,[36] );
|
||||
|
||||
if( b_predict )
|
||||
{
|
||||
if( !edge )
|
||||
{
|
||||
h->predict_8x8_filter( p_dst, edge_buf, h->mb.i_neighbour8[idx], x264_pred_i4x4_neighbors[i_mode] );
|
||||
edge = edge_buf;
|
||||
}
|
||||
|
||||
if( h->mb.b_lossless )
|
||||
x264_predict_lossless_8x8( h, p_dst, p, idx, i_mode, edge );
|
||||
else
|
||||
h->predict_8x8[i_mode]( p_dst, edge );
|
||||
}
|
||||
|
||||
if( h->mb.b_lossless )
|
||||
{
|
||||
nz = h->zigzagf.sub_8x8( h->dct.luma8x8[p*4+idx], p_src, p_dst );
|
||||
STORE_8x8_NNZ( p, idx, nz );
|
||||
h->mb.i_cbp_luma |= nz<<idx;
|
||||
return;
|
||||
}
|
||||
|
||||
h->dctf.sub8x8_dct8( dct8x8, p_src, p_dst );
|
||||
|
||||
nz = x264_quant_8x8( h, dct8x8, i_qp, ctx_cat_plane[DCT_LUMA_8x8][p], 1, p, idx );
|
||||
if( nz )
|
||||
{
|
||||
h->mb.i_cbp_luma |= 1<<idx;
|
||||
h->zigzagf.scan_8x8( h->dct.luma8x8[p*4+idx], dct8x8 );
|
||||
h->quantf.dequant_8x8( dct8x8, h->dequant8_mf[p?CQM_8IC:CQM_8IY], i_qp );
|
||||
h->dctf.add8x8_idct8( p_dst, dct8x8 );
|
||||
STORE_8x8_NNZ( p, idx, 1 );
|
||||
}
|
||||
else
|
||||
STORE_8x8_NNZ( p, idx, 0 );
|
||||
}
|
||||
|
||||
#endif
|
||||
1355
encoder/me.c
Normal file
1355
encoder/me.c
Normal file
File diff suppressed because it is too large
Load Diff
111
encoder/me.h
Normal file
111
encoder/me.h
Normal file
@@ -0,0 +1,111 @@
|
||||
/*****************************************************************************
|
||||
* me.h: motion estimation
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2003-2025 x264 project
|
||||
*
|
||||
* Authors: Loren Merritt <lorenm@u.washington.edu>
|
||||
* Laurent Aimar <fenrir@via.ecp.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_ENCODER_ME_H
|
||||
#define X264_ENCODER_ME_H
|
||||
|
||||
#define COST_MAX (1<<28)
|
||||
#define COST_MAX64 (1ULL<<60)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* aligning the first member is a gcc hack to force the struct to be aligned,
|
||||
* as well as force sizeof(struct) to be a multiple of the alignment. */
|
||||
/* input */
|
||||
ALIGNED_64( int i_pixel ); /* PIXEL_WxH */
|
||||
uint16_t *p_cost_mv; /* lambda * nbits for each possible mv */
|
||||
int i_ref_cost;
|
||||
int i_ref;
|
||||
const x264_weight_t *weight;
|
||||
|
||||
pixel *p_fref[12];
|
||||
pixel *p_fref_w;
|
||||
pixel *p_fenc[3];
|
||||
uint16_t *integral;
|
||||
int i_stride[3];
|
||||
|
||||
ALIGNED_4( int16_t mvp[2] );
|
||||
|
||||
/* output */
|
||||
int cost_mv; /* lambda * nbits for the chosen mv */
|
||||
int cost; /* satd + lambda * nbits */
|
||||
ALIGNED_8( int16_t mv[2] );
|
||||
} ALIGNED_64( x264_me_t );
|
||||
|
||||
#define x264_me_search_ref x264_template(me_search_ref)
|
||||
void x264_me_search_ref( x264_t *h, x264_me_t *m, int16_t (*mvc)[2], int i_mvc, int *p_fullpel_thresh );
|
||||
#define x264_me_search( h, m, mvc, i_mvc )\
|
||||
x264_me_search_ref( h, m, mvc, i_mvc, NULL )
|
||||
|
||||
#define x264_me_refine_qpel x264_template(me_refine_qpel)
|
||||
void x264_me_refine_qpel( x264_t *h, x264_me_t *m );
|
||||
#define x264_me_refine_qpel_refdupe x264_template(me_refine_qpel_refdupe)
|
||||
void x264_me_refine_qpel_refdupe( x264_t *h, x264_me_t *m, int *p_halfpel_thresh );
|
||||
#define x264_me_refine_qpel_rd x264_template(me_refine_qpel_rd)
|
||||
void x264_me_refine_qpel_rd( x264_t *h, x264_me_t *m, int i_lambda2, int i4, int i_list );
|
||||
#define x264_me_refine_bidir_rd x264_template(me_refine_bidir_rd)
|
||||
void x264_me_refine_bidir_rd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight, int i8, int i_lambda2 );
|
||||
#define x264_me_refine_bidir_satd x264_template(me_refine_bidir_satd)
|
||||
void x264_me_refine_bidir_satd( x264_t *h, x264_me_t *m0, x264_me_t *m1, int i_weight );
|
||||
#define x264_rd_cost_part x264_template(rd_cost_part)
|
||||
uint64_t x264_rd_cost_part( x264_t *h, int i_lambda2, int i8, int i_pixel );
|
||||
|
||||
#define COPY1_IF_LT(x,y)\
|
||||
if( (y) < (x) )\
|
||||
(x) = (y);
|
||||
|
||||
#define COPY2_IF_LT(x,y,a,b)\
|
||||
if( (y) < (x) )\
|
||||
{\
|
||||
(x) = (y);\
|
||||
(a) = (b);\
|
||||
}
|
||||
|
||||
#define COPY3_IF_LT(x,y,a,b,c,d)\
|
||||
if( (y) < (x) )\
|
||||
{\
|
||||
(x) = (y);\
|
||||
(a) = (b);\
|
||||
(c) = (d);\
|
||||
}
|
||||
|
||||
#define COPY4_IF_LT(x,y,a,b,c,d,e,f)\
|
||||
if( (y) < (x) )\
|
||||
{\
|
||||
(x) = (y);\
|
||||
(a) = (b);\
|
||||
(c) = (d);\
|
||||
(e) = (f);\
|
||||
}
|
||||
|
||||
#define COPY2_IF_GT(x,y,a,b)\
|
||||
if( (y) > (x) )\
|
||||
{\
|
||||
(x) = (y);\
|
||||
(a) = (b);\
|
||||
}
|
||||
|
||||
#endif
|
||||
3134
encoder/ratecontrol.c
Normal file
3134
encoder/ratecontrol.c
Normal file
File diff suppressed because it is too large
Load Diff
87
encoder/ratecontrol.h
Normal file
87
encoder/ratecontrol.h
Normal file
@@ -0,0 +1,87 @@
|
||||
/*****************************************************************************
|
||||
* ratecontrol.h: ratecontrol
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2003-2025 x264 project
|
||||
*
|
||||
* Authors: Loren Merritt <lorenm@u.washington.edu>
|
||||
* Laurent Aimar <fenrir@via.ecp.fr>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_ENCODER_RATECONTROL_H
|
||||
#define X264_ENCODER_RATECONTROL_H
|
||||
|
||||
/* Completely arbitrary. Ratecontrol lowers relative quality at higher framerates
|
||||
* and the reverse at lower framerates; this serves as the center of the curve.
|
||||
* Halve all the values for frame-packed 3D to compensate for the "doubled"
|
||||
* framerate. */
|
||||
#define BASE_FRAME_DURATION (0.04f / ((h->param.i_frame_packing == 5)+1))
|
||||
|
||||
/* Arbitrary limitations as a sanity check. */
|
||||
#define MAX_FRAME_DURATION (1.00f / ((h->param.i_frame_packing == 5)+1))
|
||||
#define MIN_FRAME_DURATION (0.01f / ((h->param.i_frame_packing == 5)+1))
|
||||
|
||||
#define CLIP_DURATION(f) x264_clip3f(f,MIN_FRAME_DURATION,MAX_FRAME_DURATION)
|
||||
|
||||
#define x264_ratecontrol_new x264_template(ratecontrol_new)
|
||||
int x264_ratecontrol_new ( x264_t * );
|
||||
#define x264_ratecontrol_delete x264_template(ratecontrol_delete)
|
||||
void x264_ratecontrol_delete( x264_t * );
|
||||
|
||||
#define x264_ratecontrol_init_reconfigurable x264_template(ratecontrol_init_reconfigurable)
|
||||
void x264_ratecontrol_init_reconfigurable( x264_t *h, int b_init );
|
||||
#define x264_encoder_reconfig_apply x264_template(encoder_reconfig_apply)
|
||||
int x264_encoder_reconfig_apply( x264_t *h, x264_param_t *param );
|
||||
|
||||
#define x264_adaptive_quant_frame x264_template(adaptive_quant_frame)
|
||||
void x264_adaptive_quant_frame( x264_t *h, x264_frame_t *frame, float *quant_offsets );
|
||||
#define x264_macroblock_tree_read x264_template(macroblock_tree_read)
|
||||
int x264_macroblock_tree_read( x264_t *h, x264_frame_t *frame, float *quant_offsets );
|
||||
#define x264_reference_build_list_optimal x264_template(reference_build_list_optimal)
|
||||
int x264_reference_build_list_optimal( x264_t *h );
|
||||
#define x264_thread_sync_ratecontrol x264_template(thread_sync_ratecontrol)
|
||||
void x264_thread_sync_ratecontrol( x264_t *cur, x264_t *prev, x264_t *next );
|
||||
#define x264_ratecontrol_zone_init x264_template(ratecontrol_zone_init)
|
||||
void x264_ratecontrol_zone_init( x264_t * );
|
||||
#define x264_ratecontrol_start x264_template(ratecontrol_start)
|
||||
void x264_ratecontrol_start( x264_t *, int i_force_qp, int overhead );
|
||||
#define x264_ratecontrol_slice_type x264_template(ratecontrol_slice_type)
|
||||
int x264_ratecontrol_slice_type( x264_t *, int i_frame );
|
||||
#define x264_ratecontrol_set_weights x264_template(ratecontrol_set_weights)
|
||||
void x264_ratecontrol_set_weights( x264_t *h, x264_frame_t *frm );
|
||||
#define x264_ratecontrol_mb x264_template(ratecontrol_mb)
|
||||
int x264_ratecontrol_mb( x264_t *, int bits );
|
||||
#define x264_ratecontrol_qp x264_template(ratecontrol_qp)
|
||||
int x264_ratecontrol_qp( x264_t * );
|
||||
#define x264_ratecontrol_mb_qp x264_template(ratecontrol_mb_qp)
|
||||
int x264_ratecontrol_mb_qp( x264_t *h );
|
||||
#define x264_ratecontrol_end x264_template(ratecontrol_end)
|
||||
int x264_ratecontrol_end( x264_t *, int bits, int *filler );
|
||||
#define x264_ratecontrol_summary x264_template(ratecontrol_summary)
|
||||
void x264_ratecontrol_summary( x264_t * );
|
||||
#define x264_rc_analyse_slice x264_template(rc_analyse_slice)
|
||||
int x264_rc_analyse_slice( x264_t *h );
|
||||
#define x264_threads_distribute_ratecontrol x264_template(threads_distribute_ratecontrol)
|
||||
void x264_threads_distribute_ratecontrol( x264_t *h );
|
||||
#define x264_threads_merge_ratecontrol x264_template(threads_merge_ratecontrol)
|
||||
void x264_threads_merge_ratecontrol( x264_t *h );
|
||||
#define x264_hrd_fullness x264_template(hrd_fullness)
|
||||
void x264_hrd_fullness( x264_t *h );
|
||||
|
||||
#endif
|
||||
1184
encoder/rdo.c
Normal file
1184
encoder/rdo.c
Normal file
File diff suppressed because it is too large
Load Diff
913
encoder/set.c
Normal file
913
encoder/set.c
Normal file
@@ -0,0 +1,913 @@
|
||||
/*****************************************************************************
|
||||
* set: header writing
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2003-2025 x264 project
|
||||
*
|
||||
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
|
||||
* Loren Merritt <lorenm@u.washington.edu>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common/common.h"
|
||||
#include "set.h"
|
||||
|
||||
#define bs_write_ue bs_write_ue_big
|
||||
|
||||
// Indexed by pic_struct values
|
||||
static const uint8_t num_clock_ts[10] = { 0, 1, 1, 1, 2, 2, 3, 3, 2, 3 };
|
||||
static const uint8_t avcintra_uuid[] = {0xF7, 0x49, 0x3E, 0xB3, 0xD4, 0x00, 0x47, 0x96, 0x86, 0x86, 0xC9, 0x70, 0x7B, 0x64, 0x37, 0x2A};
|
||||
|
||||
static void transpose( uint8_t *buf, int w )
|
||||
{
|
||||
for( int i = 0; i < w; i++ )
|
||||
for( int j = 0; j < i; j++ )
|
||||
XCHG( uint8_t, buf[w*i+j], buf[w*j+i] );
|
||||
}
|
||||
|
||||
static void scaling_list_write( bs_t *s, x264_sps_t *sps, int idx )
|
||||
{
|
||||
const int len = idx<4 ? 16 : 64;
|
||||
const uint8_t *zigzag = idx<4 ? x264_zigzag_scan4[0] : x264_zigzag_scan8[0];
|
||||
const uint8_t *list = sps->scaling_list[idx];
|
||||
const uint8_t *def_list = (idx==CQM_4IC) ? sps->scaling_list[CQM_4IY]
|
||||
: (idx==CQM_4PC) ? sps->scaling_list[CQM_4PY]
|
||||
: (idx==CQM_8IC+4) ? sps->scaling_list[CQM_8IY+4]
|
||||
: (idx==CQM_8PC+4) ? sps->scaling_list[CQM_8PY+4]
|
||||
: x264_cqm_jvt[idx];
|
||||
if( !memcmp( list, def_list, len ) )
|
||||
bs_write1( s, 0 ); // scaling_list_present_flag
|
||||
else if( !memcmp( list, x264_cqm_jvt[idx], len ) )
|
||||
{
|
||||
bs_write1( s, 1 ); // scaling_list_present_flag
|
||||
bs_write_se( s, -8 ); // use jvt list
|
||||
}
|
||||
else
|
||||
{
|
||||
int run;
|
||||
bs_write1( s, 1 ); // scaling_list_present_flag
|
||||
|
||||
// try run-length compression of trailing values
|
||||
for( run = len; run > 1; run-- )
|
||||
if( list[zigzag[run-1]] != list[zigzag[run-2]] )
|
||||
break;
|
||||
if( run < len && len - run < bs_size_se( (int8_t)-list[zigzag[run]] ) )
|
||||
run = len;
|
||||
|
||||
for( int j = 0; j < run; j++ )
|
||||
bs_write_se( s, (int8_t)(list[zigzag[j]] - (j>0 ? list[zigzag[j-1]] : 8)) ); // delta
|
||||
|
||||
if( run < len )
|
||||
bs_write_se( s, (int8_t)-list[zigzag[run]] );
|
||||
}
|
||||
}
|
||||
|
||||
void x264_sei_write( bs_t *s, uint8_t *payload, int payload_size, int payload_type )
|
||||
{
|
||||
int i;
|
||||
|
||||
bs_realign( s );
|
||||
|
||||
for( i = 0; i <= payload_type-255; i += 255 )
|
||||
bs_write( s, 8, 255 );
|
||||
bs_write( s, 8, payload_type-i );
|
||||
|
||||
for( i = 0; i <= payload_size-255; i += 255 )
|
||||
bs_write( s, 8, 255 );
|
||||
bs_write( s, 8, payload_size-i );
|
||||
|
||||
for( i = 0; i < payload_size; i++ )
|
||||
bs_write( s, 8, payload[i] );
|
||||
|
||||
bs_rbsp_trailing( s );
|
||||
bs_flush( s );
|
||||
}
|
||||
|
||||
void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param )
|
||||
{
|
||||
int csp = param->i_csp & X264_CSP_MASK;
|
||||
|
||||
sps->i_id = i_id;
|
||||
sps->i_mb_width = ( param->i_width + 15 ) / 16;
|
||||
sps->i_mb_height= ( param->i_height + 15 ) / 16;
|
||||
sps->b_frame_mbs_only = !(param->b_interlaced || param->b_fake_interlaced);
|
||||
if( !sps->b_frame_mbs_only )
|
||||
sps->i_mb_height = ( sps->i_mb_height + 1 ) & ~1;
|
||||
sps->i_chroma_format_idc = csp >= X264_CSP_I444 ? CHROMA_444 :
|
||||
csp >= X264_CSP_I422 ? CHROMA_422 :
|
||||
csp >= X264_CSP_I420 ? CHROMA_420 : CHROMA_400;
|
||||
|
||||
sps->b_qpprime_y_zero_transform_bypass = param->rc.i_rc_method == X264_RC_CQP && param->rc.i_qp_constant == 0;
|
||||
if( sps->b_qpprime_y_zero_transform_bypass || sps->i_chroma_format_idc == CHROMA_444 )
|
||||
sps->i_profile_idc = PROFILE_HIGH444_PREDICTIVE;
|
||||
else if( sps->i_chroma_format_idc == CHROMA_422 )
|
||||
sps->i_profile_idc = PROFILE_HIGH422;
|
||||
else if( BIT_DEPTH > 8 )
|
||||
sps->i_profile_idc = PROFILE_HIGH10;
|
||||
else if( param->analyse.b_transform_8x8 || param->i_cqm_preset != X264_CQM_FLAT || sps->i_chroma_format_idc == CHROMA_400 )
|
||||
sps->i_profile_idc = PROFILE_HIGH;
|
||||
else if( param->b_cabac || param->i_bframe > 0 || param->b_interlaced || param->b_fake_interlaced || param->analyse.i_weighted_pred > 0 )
|
||||
sps->i_profile_idc = PROFILE_MAIN;
|
||||
else
|
||||
sps->i_profile_idc = PROFILE_BASELINE;
|
||||
|
||||
sps->b_constraint_set0 = sps->i_profile_idc == PROFILE_BASELINE;
|
||||
/* x264 doesn't support the features that are in Baseline and not in Main,
|
||||
* namely arbitrary_slice_order and slice_groups. */
|
||||
sps->b_constraint_set1 = sps->i_profile_idc <= PROFILE_MAIN;
|
||||
/* Never set constraint_set2, it is not necessary and not used in real world. */
|
||||
sps->b_constraint_set2 = 0;
|
||||
sps->b_constraint_set3 = 0;
|
||||
|
||||
sps->i_level_idc = param->i_level_idc;
|
||||
if( param->i_level_idc == 9 && ( sps->i_profile_idc == PROFILE_BASELINE || sps->i_profile_idc == PROFILE_MAIN ) )
|
||||
{
|
||||
sps->b_constraint_set3 = 1; /* level 1b with Baseline or Main profile is signalled via constraint_set3 */
|
||||
sps->i_level_idc = 11;
|
||||
}
|
||||
/* Intra profiles */
|
||||
if( param->i_keyint_max == 1 && sps->i_profile_idc >= PROFILE_HIGH )
|
||||
sps->b_constraint_set3 = 1;
|
||||
|
||||
sps->vui.i_num_reorder_frames = param->i_bframe_pyramid ? 2 : param->i_bframe ? 1 : 0;
|
||||
/* extra slot with pyramid so that we don't have to override the
|
||||
* order of forgetting old pictures */
|
||||
sps->vui.i_max_dec_frame_buffering =
|
||||
sps->i_num_ref_frames = X264_MIN(X264_REF_MAX, X264_MAX4(param->i_frame_reference, 1 + sps->vui.i_num_reorder_frames,
|
||||
param->i_bframe_pyramid ? 4 : 1, param->i_dpb_size));
|
||||
sps->i_num_ref_frames -= param->i_bframe_pyramid == X264_B_PYRAMID_STRICT;
|
||||
if( param->i_keyint_max == 1 )
|
||||
{
|
||||
sps->i_num_ref_frames = 0;
|
||||
sps->vui.i_max_dec_frame_buffering = 0;
|
||||
}
|
||||
|
||||
/* number of refs + current frame */
|
||||
int max_frame_num = sps->vui.i_max_dec_frame_buffering * (!!param->i_bframe_pyramid+1) + 1;
|
||||
/* Intra refresh cannot write a recovery time greater than max frame num-1 */
|
||||
if( param->b_intra_refresh )
|
||||
{
|
||||
int time_to_recovery = X264_MIN( sps->i_mb_width - 1, param->i_keyint_max ) + param->i_bframe - 1;
|
||||
max_frame_num = X264_MAX( max_frame_num, time_to_recovery+1 );
|
||||
}
|
||||
|
||||
sps->i_log2_max_frame_num = 4;
|
||||
while( (1 << sps->i_log2_max_frame_num) <= max_frame_num )
|
||||
sps->i_log2_max_frame_num++;
|
||||
|
||||
sps->i_poc_type = param->i_bframe || param->b_interlaced || param->i_avcintra_class ? 0 : 2;
|
||||
if( sps->i_poc_type == 0 )
|
||||
{
|
||||
int max_delta_poc = (param->i_bframe + 2) * (!!param->i_bframe_pyramid + 1) * 2;
|
||||
sps->i_log2_max_poc_lsb = 4;
|
||||
while( (1 << sps->i_log2_max_poc_lsb) <= max_delta_poc * 2 )
|
||||
sps->i_log2_max_poc_lsb++;
|
||||
}
|
||||
|
||||
sps->b_vui = 1;
|
||||
|
||||
sps->b_gaps_in_frame_num_value_allowed = 0;
|
||||
sps->b_mb_adaptive_frame_field = param->b_interlaced;
|
||||
sps->b_direct8x8_inference = 1;
|
||||
|
||||
x264_sps_init_reconfigurable( sps, param );
|
||||
|
||||
sps->vui.b_overscan_info_present = param->vui.i_overscan > 0 && param->vui.i_overscan <= 2;
|
||||
if( sps->vui.b_overscan_info_present )
|
||||
sps->vui.b_overscan_info = ( param->vui.i_overscan == 2 ? 1 : 0 );
|
||||
|
||||
sps->vui.b_signal_type_present = 0;
|
||||
sps->vui.i_vidformat = ( param->vui.i_vidformat >= 0 && param->vui.i_vidformat <= 5 ? param->vui.i_vidformat : 5 );
|
||||
sps->vui.b_fullrange = ( param->vui.b_fullrange >= 0 && param->vui.b_fullrange <= 1 ? param->vui.b_fullrange :
|
||||
( csp >= X264_CSP_BGR ? 1 : 0 ) );
|
||||
sps->vui.b_color_description_present = 0;
|
||||
|
||||
sps->vui.i_colorprim = ( param->vui.i_colorprim >= 0 && param->vui.i_colorprim <= 12 ? param->vui.i_colorprim : 2 );
|
||||
sps->vui.i_transfer = ( param->vui.i_transfer >= 0 && param->vui.i_transfer <= 18 ? param->vui.i_transfer : 2 );
|
||||
sps->vui.i_colmatrix = ( param->vui.i_colmatrix >= 0 && param->vui.i_colmatrix <= 14 ? param->vui.i_colmatrix :
|
||||
( csp >= X264_CSP_BGR ? 0 : 2 ) );
|
||||
if( sps->vui.i_colorprim != 2 || sps->vui.i_transfer != 2 || sps->vui.i_colmatrix != 2 )
|
||||
sps->vui.b_color_description_present = 1;
|
||||
|
||||
if( sps->vui.i_vidformat != 5 || sps->vui.b_fullrange || sps->vui.b_color_description_present )
|
||||
sps->vui.b_signal_type_present = 1;
|
||||
|
||||
/* FIXME: not sufficient for interlaced video */
|
||||
sps->vui.b_chroma_loc_info_present = param->vui.i_chroma_loc > 0 && param->vui.i_chroma_loc <= 5 &&
|
||||
sps->i_chroma_format_idc == CHROMA_420;
|
||||
if( sps->vui.b_chroma_loc_info_present )
|
||||
{
|
||||
sps->vui.i_chroma_loc_top = param->vui.i_chroma_loc;
|
||||
sps->vui.i_chroma_loc_bottom = param->vui.i_chroma_loc;
|
||||
}
|
||||
|
||||
sps->vui.b_timing_info_present = param->i_timebase_num > 0 && param->i_timebase_den > 0;
|
||||
|
||||
if( sps->vui.b_timing_info_present )
|
||||
{
|
||||
sps->vui.i_num_units_in_tick = param->i_timebase_num;
|
||||
sps->vui.i_time_scale = param->i_timebase_den * 2;
|
||||
sps->vui.b_fixed_frame_rate = !param->b_vfr_input;
|
||||
}
|
||||
|
||||
sps->vui.b_vcl_hrd_parameters_present = 0; // we don't support VCL HRD
|
||||
sps->vui.b_nal_hrd_parameters_present = !!param->i_nal_hrd;
|
||||
sps->vui.b_pic_struct_present = param->b_pic_struct;
|
||||
|
||||
// NOTE: HRD related parts of the SPS are initialised in x264_ratecontrol_init_reconfigurable
|
||||
|
||||
sps->vui.b_bitstream_restriction = !(sps->b_constraint_set3 && sps->i_profile_idc >= PROFILE_HIGH);
|
||||
if( sps->vui.b_bitstream_restriction )
|
||||
{
|
||||
sps->vui.b_motion_vectors_over_pic_boundaries = 1;
|
||||
sps->vui.i_max_bytes_per_pic_denom = 0;
|
||||
sps->vui.i_max_bits_per_mb_denom = 0;
|
||||
sps->vui.i_log2_max_mv_length_horizontal =
|
||||
sps->vui.i_log2_max_mv_length_vertical = (int)log2f( X264_MAX( 1, param->analyse.i_mv_range*4-1 ) ) + 1;
|
||||
}
|
||||
|
||||
sps->b_avcintra_hd = param->i_avcintra_class && param->i_avcintra_class <= 200;
|
||||
sps->b_avcintra_4k = param->i_avcintra_class > 200;
|
||||
sps->i_cqm_preset = param->i_cqm_preset;
|
||||
}
|
||||
|
||||
void x264_sps_init_reconfigurable( x264_sps_t *sps, x264_param_t *param )
|
||||
{
|
||||
sps->crop.i_left = param->crop_rect.i_left;
|
||||
sps->crop.i_top = param->crop_rect.i_top;
|
||||
sps->crop.i_right = param->crop_rect.i_right + sps->i_mb_width*16 - param->i_width;
|
||||
sps->crop.i_bottom = param->crop_rect.i_bottom + sps->i_mb_height*16 - param->i_height;
|
||||
sps->b_crop = sps->crop.i_left || sps->crop.i_top ||
|
||||
sps->crop.i_right || sps->crop.i_bottom;
|
||||
|
||||
sps->vui.b_aspect_ratio_info_present = 0;
|
||||
if( param->vui.i_sar_width > 0 && param->vui.i_sar_height > 0 )
|
||||
{
|
||||
sps->vui.b_aspect_ratio_info_present = 1;
|
||||
sps->vui.i_sar_width = param->vui.i_sar_width;
|
||||
sps->vui.i_sar_height= param->vui.i_sar_height;
|
||||
}
|
||||
}
|
||||
|
||||
void x264_sps_init_scaling_list( x264_sps_t *sps, x264_param_t *param )
|
||||
{
|
||||
switch( sps->i_cqm_preset )
|
||||
{
|
||||
case X264_CQM_FLAT:
|
||||
for( int i = 0; i < 8; i++ )
|
||||
sps->scaling_list[i] = x264_cqm_flat16;
|
||||
break;
|
||||
case X264_CQM_JVT:
|
||||
for( int i = 0; i < 8; i++ )
|
||||
sps->scaling_list[i] = x264_cqm_jvt[i];
|
||||
break;
|
||||
case X264_CQM_CUSTOM:
|
||||
/* match the transposed DCT & zigzag */
|
||||
transpose( param->cqm_4iy, 4 );
|
||||
transpose( param->cqm_4py, 4 );
|
||||
transpose( param->cqm_4ic, 4 );
|
||||
transpose( param->cqm_4pc, 4 );
|
||||
transpose( param->cqm_8iy, 8 );
|
||||
transpose( param->cqm_8py, 8 );
|
||||
transpose( param->cqm_8ic, 8 );
|
||||
transpose( param->cqm_8pc, 8 );
|
||||
sps->scaling_list[CQM_4IY] = param->cqm_4iy;
|
||||
sps->scaling_list[CQM_4PY] = param->cqm_4py;
|
||||
sps->scaling_list[CQM_4IC] = param->cqm_4ic;
|
||||
sps->scaling_list[CQM_4PC] = param->cqm_4pc;
|
||||
sps->scaling_list[CQM_8IY+4] = param->cqm_8iy;
|
||||
sps->scaling_list[CQM_8PY+4] = param->cqm_8py;
|
||||
sps->scaling_list[CQM_8IC+4] = param->cqm_8ic;
|
||||
sps->scaling_list[CQM_8PC+4] = param->cqm_8pc;
|
||||
for( int i = 0; i < 8; i++ )
|
||||
for( int j = 0; j < (i < 4 ? 16 : 64); j++ )
|
||||
if( sps->scaling_list[i][j] == 0 )
|
||||
sps->scaling_list[i] = x264_cqm_jvt[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void x264_sps_write( bs_t *s, x264_sps_t *sps )
|
||||
{
|
||||
bs_realign( s );
|
||||
bs_write( s, 8, sps->i_profile_idc );
|
||||
bs_write1( s, sps->b_constraint_set0 );
|
||||
bs_write1( s, sps->b_constraint_set1 );
|
||||
bs_write1( s, sps->b_constraint_set2 );
|
||||
bs_write1( s, sps->b_constraint_set3 );
|
||||
|
||||
bs_write( s, 4, 0 ); /* reserved */
|
||||
|
||||
bs_write( s, 8, sps->i_level_idc );
|
||||
|
||||
bs_write_ue( s, sps->i_id );
|
||||
|
||||
if( sps->i_profile_idc >= PROFILE_HIGH )
|
||||
{
|
||||
bs_write_ue( s, sps->i_chroma_format_idc );
|
||||
if( sps->i_chroma_format_idc == CHROMA_444 )
|
||||
bs_write1( s, 0 ); // separate_colour_plane_flag
|
||||
bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_luma_minus8
|
||||
bs_write_ue( s, BIT_DEPTH-8 ); // bit_depth_chroma_minus8
|
||||
bs_write1( s, sps->b_qpprime_y_zero_transform_bypass );
|
||||
/* Exactly match the AVC-Intra bitstream */
|
||||
bs_write1( s, sps->b_avcintra_hd ); // seq_scaling_matrix_present_flag
|
||||
if( sps->b_avcintra_hd )
|
||||
{
|
||||
scaling_list_write( s, sps, CQM_4IY );
|
||||
scaling_list_write( s, sps, CQM_4IC );
|
||||
scaling_list_write( s, sps, CQM_4IC );
|
||||
bs_write1( s, 0 ); // no inter
|
||||
bs_write1( s, 0 ); // no inter
|
||||
bs_write1( s, 0 ); // no inter
|
||||
scaling_list_write( s, sps, CQM_8IY+4 );
|
||||
bs_write1( s, 0 ); // no inter
|
||||
if( sps->i_chroma_format_idc == CHROMA_444 )
|
||||
{
|
||||
scaling_list_write( s, sps, CQM_8IC+4 );
|
||||
bs_write1( s, 0 ); // no inter
|
||||
scaling_list_write( s, sps, CQM_8IC+4 );
|
||||
bs_write1( s, 0 ); // no inter
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bs_write_ue( s, sps->i_log2_max_frame_num - 4 );
|
||||
bs_write_ue( s, sps->i_poc_type );
|
||||
if( sps->i_poc_type == 0 )
|
||||
bs_write_ue( s, sps->i_log2_max_poc_lsb - 4 );
|
||||
bs_write_ue( s, sps->i_num_ref_frames );
|
||||
bs_write1( s, sps->b_gaps_in_frame_num_value_allowed );
|
||||
bs_write_ue( s, sps->i_mb_width - 1 );
|
||||
bs_write_ue( s, (sps->i_mb_height >> !sps->b_frame_mbs_only) - 1);
|
||||
bs_write1( s, sps->b_frame_mbs_only );
|
||||
if( !sps->b_frame_mbs_only )
|
||||
bs_write1( s, sps->b_mb_adaptive_frame_field );
|
||||
bs_write1( s, sps->b_direct8x8_inference );
|
||||
|
||||
bs_write1( s, sps->b_crop );
|
||||
if( sps->b_crop )
|
||||
{
|
||||
int h_shift = sps->i_chroma_format_idc == CHROMA_420 || sps->i_chroma_format_idc == CHROMA_422;
|
||||
int v_shift = (sps->i_chroma_format_idc == CHROMA_420) + !sps->b_frame_mbs_only;
|
||||
bs_write_ue( s, sps->crop.i_left >> h_shift );
|
||||
bs_write_ue( s, sps->crop.i_right >> h_shift );
|
||||
bs_write_ue( s, sps->crop.i_top >> v_shift );
|
||||
bs_write_ue( s, sps->crop.i_bottom >> v_shift );
|
||||
}
|
||||
|
||||
bs_write1( s, sps->b_vui );
|
||||
if( sps->b_vui )
|
||||
{
|
||||
bs_write1( s, sps->vui.b_aspect_ratio_info_present );
|
||||
if( sps->vui.b_aspect_ratio_info_present )
|
||||
{
|
||||
int i;
|
||||
static const struct { uint8_t w, h, sar; } sar[] =
|
||||
{
|
||||
// aspect_ratio_idc = 0 -> unspecified
|
||||
{ 1, 1, 1 }, { 12, 11, 2 }, { 10, 11, 3 }, { 16, 11, 4 },
|
||||
{ 40, 33, 5 }, { 24, 11, 6 }, { 20, 11, 7 }, { 32, 11, 8 },
|
||||
{ 80, 33, 9 }, { 18, 11, 10}, { 15, 11, 11}, { 64, 33, 12},
|
||||
{160, 99, 13}, { 4, 3, 14}, { 3, 2, 15}, { 2, 1, 16},
|
||||
// aspect_ratio_idc = [17..254] -> reserved
|
||||
{ 0, 0, 255 }
|
||||
};
|
||||
for( i = 0; sar[i].sar != 255; i++ )
|
||||
{
|
||||
if( sar[i].w == sps->vui.i_sar_width &&
|
||||
sar[i].h == sps->vui.i_sar_height )
|
||||
break;
|
||||
}
|
||||
bs_write( s, 8, sar[i].sar );
|
||||
if( sar[i].sar == 255 ) /* aspect_ratio_idc (extended) */
|
||||
{
|
||||
bs_write( s, 16, sps->vui.i_sar_width );
|
||||
bs_write( s, 16, sps->vui.i_sar_height );
|
||||
}
|
||||
}
|
||||
|
||||
bs_write1( s, sps->vui.b_overscan_info_present );
|
||||
if( sps->vui.b_overscan_info_present )
|
||||
bs_write1( s, sps->vui.b_overscan_info );
|
||||
|
||||
bs_write1( s, sps->vui.b_signal_type_present );
|
||||
if( sps->vui.b_signal_type_present )
|
||||
{
|
||||
bs_write( s, 3, sps->vui.i_vidformat );
|
||||
bs_write1( s, sps->vui.b_fullrange );
|
||||
bs_write1( s, sps->vui.b_color_description_present );
|
||||
if( sps->vui.b_color_description_present )
|
||||
{
|
||||
bs_write( s, 8, sps->vui.i_colorprim );
|
||||
bs_write( s, 8, sps->vui.i_transfer );
|
||||
bs_write( s, 8, sps->vui.i_colmatrix );
|
||||
}
|
||||
}
|
||||
|
||||
bs_write1( s, sps->vui.b_chroma_loc_info_present );
|
||||
if( sps->vui.b_chroma_loc_info_present )
|
||||
{
|
||||
bs_write_ue( s, sps->vui.i_chroma_loc_top );
|
||||
bs_write_ue( s, sps->vui.i_chroma_loc_bottom );
|
||||
}
|
||||
|
||||
bs_write1( s, sps->vui.b_timing_info_present );
|
||||
if( sps->vui.b_timing_info_present )
|
||||
{
|
||||
bs_write32( s, sps->vui.i_num_units_in_tick );
|
||||
bs_write32( s, sps->vui.i_time_scale );
|
||||
bs_write1( s, sps->vui.b_fixed_frame_rate );
|
||||
}
|
||||
|
||||
bs_write1( s, sps->vui.b_nal_hrd_parameters_present );
|
||||
if( sps->vui.b_nal_hrd_parameters_present )
|
||||
{
|
||||
bs_write_ue( s, sps->vui.hrd.i_cpb_cnt - 1 );
|
||||
bs_write( s, 4, sps->vui.hrd.i_bit_rate_scale );
|
||||
bs_write( s, 4, sps->vui.hrd.i_cpb_size_scale );
|
||||
|
||||
bs_write_ue( s, sps->vui.hrd.i_bit_rate_value - 1 );
|
||||
bs_write_ue( s, sps->vui.hrd.i_cpb_size_value - 1 );
|
||||
|
||||
bs_write1( s, sps->vui.hrd.b_cbr_hrd );
|
||||
|
||||
bs_write( s, 5, sps->vui.hrd.i_initial_cpb_removal_delay_length - 1 );
|
||||
bs_write( s, 5, sps->vui.hrd.i_cpb_removal_delay_length - 1 );
|
||||
bs_write( s, 5, sps->vui.hrd.i_dpb_output_delay_length - 1 );
|
||||
bs_write( s, 5, sps->vui.hrd.i_time_offset_length );
|
||||
}
|
||||
|
||||
bs_write1( s, sps->vui.b_vcl_hrd_parameters_present );
|
||||
|
||||
if( sps->vui.b_nal_hrd_parameters_present || sps->vui.b_vcl_hrd_parameters_present )
|
||||
bs_write1( s, 0 ); /* low_delay_hrd_flag */
|
||||
|
||||
bs_write1( s, sps->vui.b_pic_struct_present );
|
||||
bs_write1( s, sps->vui.b_bitstream_restriction );
|
||||
if( sps->vui.b_bitstream_restriction )
|
||||
{
|
||||
bs_write1( s, sps->vui.b_motion_vectors_over_pic_boundaries );
|
||||
bs_write_ue( s, sps->vui.i_max_bytes_per_pic_denom );
|
||||
bs_write_ue( s, sps->vui.i_max_bits_per_mb_denom );
|
||||
bs_write_ue( s, sps->vui.i_log2_max_mv_length_horizontal );
|
||||
bs_write_ue( s, sps->vui.i_log2_max_mv_length_vertical );
|
||||
bs_write_ue( s, sps->vui.i_num_reorder_frames );
|
||||
bs_write_ue( s, sps->vui.i_max_dec_frame_buffering );
|
||||
}
|
||||
}
|
||||
|
||||
bs_rbsp_trailing( s );
|
||||
bs_flush( s );
|
||||
}
|
||||
|
||||
void x264_pps_init( x264_pps_t *pps, int i_id, x264_param_t *param, x264_sps_t *sps )
|
||||
{
|
||||
pps->i_id = i_id;
|
||||
pps->i_sps_id = sps->i_id;
|
||||
pps->b_cabac = param->b_cabac;
|
||||
|
||||
pps->b_pic_order = !param->i_avcintra_class && param->b_interlaced;
|
||||
pps->i_num_slice_groups = 1;
|
||||
|
||||
pps->i_num_ref_idx_l0_default_active = param->i_frame_reference;
|
||||
pps->i_num_ref_idx_l1_default_active = 1;
|
||||
|
||||
pps->b_weighted_pred = param->analyse.i_weighted_pred > 0;
|
||||
pps->b_weighted_bipred = param->analyse.b_weighted_bipred ? 2 : 0;
|
||||
|
||||
pps->i_pic_init_qp = param->rc.i_rc_method == X264_RC_ABR || param->b_stitchable ? 26 + QP_BD_OFFSET : SPEC_QP( param->rc.i_qp_constant );
|
||||
pps->i_pic_init_qs = 26 + QP_BD_OFFSET;
|
||||
|
||||
pps->i_chroma_qp_index_offset = param->analyse.i_chroma_qp_offset;
|
||||
pps->b_deblocking_filter_control = 1;
|
||||
pps->b_constrained_intra_pred = param->b_constrained_intra;
|
||||
pps->b_redundant_pic_cnt = 0;
|
||||
|
||||
pps->b_transform_8x8_mode = param->analyse.b_transform_8x8 ? 1 : 0;
|
||||
}
|
||||
|
||||
void x264_pps_write( bs_t *s, x264_sps_t *sps, x264_pps_t *pps )
|
||||
{
|
||||
bs_realign( s );
|
||||
bs_write_ue( s, pps->i_id );
|
||||
bs_write_ue( s, pps->i_sps_id );
|
||||
|
||||
bs_write1( s, pps->b_cabac );
|
||||
bs_write1( s, pps->b_pic_order );
|
||||
bs_write_ue( s, pps->i_num_slice_groups - 1 );
|
||||
|
||||
bs_write_ue( s, pps->i_num_ref_idx_l0_default_active - 1 );
|
||||
bs_write_ue( s, pps->i_num_ref_idx_l1_default_active - 1 );
|
||||
bs_write1( s, pps->b_weighted_pred );
|
||||
bs_write( s, 2, pps->b_weighted_bipred );
|
||||
|
||||
bs_write_se( s, pps->i_pic_init_qp - 26 - QP_BD_OFFSET );
|
||||
bs_write_se( s, pps->i_pic_init_qs - 26 - QP_BD_OFFSET );
|
||||
bs_write_se( s, pps->i_chroma_qp_index_offset );
|
||||
|
||||
bs_write1( s, pps->b_deblocking_filter_control );
|
||||
bs_write1( s, pps->b_constrained_intra_pred );
|
||||
bs_write1( s, pps->b_redundant_pic_cnt );
|
||||
|
||||
int b_scaling_list = !sps->b_avcintra_hd && sps->i_cqm_preset != X264_CQM_FLAT;
|
||||
if( pps->b_transform_8x8_mode || b_scaling_list )
|
||||
{
|
||||
bs_write1( s, pps->b_transform_8x8_mode );
|
||||
bs_write1( s, b_scaling_list );
|
||||
if( b_scaling_list )
|
||||
{
|
||||
scaling_list_write( s, sps, CQM_4IY );
|
||||
scaling_list_write( s, sps, CQM_4IC );
|
||||
if( sps->b_avcintra_4k )
|
||||
{
|
||||
scaling_list_write( s, sps, CQM_4IC );
|
||||
bs_write1( s, 0 ); // no inter
|
||||
bs_write1( s, 0 ); // no inter
|
||||
bs_write1( s, 0 ); // no inter
|
||||
}
|
||||
else
|
||||
{
|
||||
bs_write1( s, 0 ); // Cr = Cb
|
||||
scaling_list_write( s, sps, CQM_4PY );
|
||||
scaling_list_write( s, sps, CQM_4PC );
|
||||
bs_write1( s, 0 ); // Cr = Cb
|
||||
}
|
||||
if( pps->b_transform_8x8_mode )
|
||||
{
|
||||
scaling_list_write( s, sps, CQM_8IY+4 );
|
||||
if( sps->b_avcintra_4k )
|
||||
bs_write1( s, 0 ); // no inter
|
||||
else
|
||||
scaling_list_write( s, sps, CQM_8PY+4 );
|
||||
if( sps->i_chroma_format_idc == CHROMA_444 )
|
||||
{
|
||||
scaling_list_write( s, sps, CQM_8IC+4 );
|
||||
scaling_list_write( s, sps, CQM_8PC+4 );
|
||||
bs_write1( s, 0 ); // Cr = Cb
|
||||
bs_write1( s, 0 ); // Cr = Cb
|
||||
}
|
||||
}
|
||||
}
|
||||
bs_write_se( s, pps->i_chroma_qp_index_offset );
|
||||
}
|
||||
|
||||
bs_rbsp_trailing( s );
|
||||
bs_flush( s );
|
||||
}
|
||||
|
||||
void x264_sei_recovery_point_write( x264_t *h, bs_t *s, int recovery_frame_cnt )
|
||||
{
|
||||
bs_t q;
|
||||
ALIGNED_4( uint8_t tmp_buf[100] );
|
||||
M32( tmp_buf ) = 0; // shut up gcc
|
||||
bs_init( &q, tmp_buf, 100 );
|
||||
|
||||
bs_realign( &q );
|
||||
|
||||
bs_write_ue( &q, recovery_frame_cnt ); // recovery_frame_cnt
|
||||
bs_write1( &q, 1 ); //exact_match_flag 1
|
||||
bs_write1( &q, 0 ); //broken_link_flag 0
|
||||
bs_write( &q, 2, 0 ); //changing_slice_group 0
|
||||
|
||||
bs_align_10( &q );
|
||||
|
||||
x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_RECOVERY_POINT );
|
||||
}
|
||||
|
||||
int x264_sei_version_write( x264_t *h, bs_t *s )
|
||||
{
|
||||
// random ID number generated according to ISO-11578
|
||||
static const uint8_t uuid[16] =
|
||||
{
|
||||
0xdc, 0x45, 0xe9, 0xbd, 0xe6, 0xd9, 0x48, 0xb7,
|
||||
0x96, 0x2c, 0xd8, 0x20, 0xd9, 0x23, 0xee, 0xef
|
||||
};
|
||||
char *opts = x264_param2string( &h->param, 0 );
|
||||
char *payload;
|
||||
int length;
|
||||
|
||||
if( !opts )
|
||||
return -1;
|
||||
CHECKED_MALLOC( payload, 200 + strlen( opts ) );
|
||||
|
||||
memcpy( payload, uuid, 16 );
|
||||
sprintf( payload+16, "x264 - core %d%s - H.264/MPEG-4 AVC codec - "
|
||||
"Copy%s 2003-2025 - http://www.videolan.org/x264.html - options: %s",
|
||||
X264_BUILD, X264_VERSION, HAVE_GPL?"left":"right", opts );
|
||||
length = strlen(payload)+1;
|
||||
|
||||
x264_sei_write( s, (uint8_t *)payload, length, SEI_USER_DATA_UNREGISTERED );
|
||||
|
||||
x264_free( opts );
|
||||
x264_free( payload );
|
||||
return 0;
|
||||
fail:
|
||||
x264_free( opts );
|
||||
return -1;
|
||||
}
|
||||
|
||||
void x264_sei_buffering_period_write( x264_t *h, bs_t *s )
|
||||
{
|
||||
x264_sps_t *sps = h->sps;
|
||||
bs_t q;
|
||||
ALIGNED_4( uint8_t tmp_buf[100] );
|
||||
M32( tmp_buf ) = 0; // shut up gcc
|
||||
bs_init( &q, tmp_buf, 100 );
|
||||
|
||||
bs_realign( &q );
|
||||
bs_write_ue( &q, sps->i_id );
|
||||
|
||||
if( sps->vui.b_nal_hrd_parameters_present )
|
||||
{
|
||||
bs_write( &q, sps->vui.hrd.i_initial_cpb_removal_delay_length, h->initial_cpb_removal_delay );
|
||||
bs_write( &q, sps->vui.hrd.i_initial_cpb_removal_delay_length, h->initial_cpb_removal_delay_offset );
|
||||
}
|
||||
|
||||
bs_align_10( &q );
|
||||
|
||||
x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_BUFFERING_PERIOD );
|
||||
}
|
||||
|
||||
void x264_sei_pic_timing_write( x264_t *h, bs_t *s )
|
||||
{
|
||||
x264_sps_t *sps = h->sps;
|
||||
bs_t q;
|
||||
ALIGNED_4( uint8_t tmp_buf[100] );
|
||||
M32( tmp_buf ) = 0; // shut up gcc
|
||||
bs_init( &q, tmp_buf, 100 );
|
||||
|
||||
bs_realign( &q );
|
||||
|
||||
if( sps->vui.b_nal_hrd_parameters_present || sps->vui.b_vcl_hrd_parameters_present )
|
||||
{
|
||||
bs_write( &q, sps->vui.hrd.i_cpb_removal_delay_length, h->fenc->i_cpb_delay - h->i_cpb_delay_pir_offset );
|
||||
bs_write( &q, sps->vui.hrd.i_dpb_output_delay_length, h->fenc->i_dpb_output_delay );
|
||||
}
|
||||
|
||||
if( sps->vui.b_pic_struct_present )
|
||||
{
|
||||
bs_write( &q, 4, h->fenc->i_pic_struct-1 ); // We use index 0 for "Auto"
|
||||
|
||||
// These clock timestamps are not standardised so we don't set them
|
||||
// They could be time of origin, capture or alternative ideal display
|
||||
for( int i = 0; i < num_clock_ts[h->fenc->i_pic_struct]; i++ )
|
||||
bs_write1( &q, 0 ); // clock_timestamp_flag
|
||||
}
|
||||
|
||||
bs_align_10( &q );
|
||||
|
||||
x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_PIC_TIMING );
|
||||
}
|
||||
|
||||
void x264_sei_frame_packing_write( x264_t *h, bs_t *s )
|
||||
{
|
||||
int quincunx_sampling_flag = h->param.i_frame_packing == 0;
|
||||
bs_t q;
|
||||
ALIGNED_4( uint8_t tmp_buf[100] );
|
||||
M32( tmp_buf ) = 0; // shut up gcc
|
||||
bs_init( &q, tmp_buf, 100 );
|
||||
|
||||
bs_realign( &q );
|
||||
|
||||
bs_write_ue( &q, 0 ); // frame_packing_arrangement_id
|
||||
bs_write1( &q, 0 ); // frame_packing_arrangement_cancel_flag
|
||||
bs_write ( &q, 7, h->param.i_frame_packing ); // frame_packing_arrangement_type
|
||||
bs_write1( &q, quincunx_sampling_flag ); // quincunx_sampling_flag
|
||||
|
||||
// 0: views are unrelated, 1: left view is on the left, 2: left view is on the right
|
||||
bs_write ( &q, 6, h->param.i_frame_packing != 6 ); // content_interpretation_type
|
||||
|
||||
bs_write1( &q, 0 ); // spatial_flipping_flag
|
||||
bs_write1( &q, 0 ); // frame0_flipped_flag
|
||||
bs_write1( &q, 0 ); // field_views_flag
|
||||
bs_write1( &q, h->param.i_frame_packing == 5 && !(h->fenc->i_frame&1) ); // current_frame_is_frame0_flag
|
||||
bs_write1( &q, 0 ); // frame0_self_contained_flag
|
||||
bs_write1( &q, 0 ); // frame1_self_contained_flag
|
||||
if( quincunx_sampling_flag == 0 && h->param.i_frame_packing != 5 )
|
||||
{
|
||||
bs_write( &q, 4, 0 ); // frame0_grid_position_x
|
||||
bs_write( &q, 4, 0 ); // frame0_grid_position_y
|
||||
bs_write( &q, 4, 0 ); // frame1_grid_position_x
|
||||
bs_write( &q, 4, 0 ); // frame1_grid_position_y
|
||||
}
|
||||
bs_write( &q, 8, 0 ); // frame_packing_arrangement_reserved_byte
|
||||
// "frame_packing_arrangement_repetition_period equal to 1 specifies that the frame packing arrangement SEI message persists in output"
|
||||
// for (i_frame_packing == 5) this will undermine current_frame_is_frame0_flag which must alternate every view sequence
|
||||
bs_write_ue( &q, h->param.i_frame_packing != 5 ); // frame_packing_arrangement_repetition_period
|
||||
bs_write1( &q, 0 ); // frame_packing_arrangement_extension_flag
|
||||
|
||||
bs_align_10( &q );
|
||||
|
||||
x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_FRAME_PACKING );
|
||||
}
|
||||
|
||||
void x264_sei_mastering_display_write( x264_t *h, bs_t *s )
|
||||
{
|
||||
bs_t q;
|
||||
ALIGNED_4( uint8_t tmp_buf[100] );
|
||||
M32( tmp_buf ) = 0; // shut up gcc
|
||||
bs_init( &q, tmp_buf, 100 );
|
||||
|
||||
bs_realign( &q );
|
||||
|
||||
bs_write( &q, 16, h->param.mastering_display.i_green_x );
|
||||
bs_write( &q, 16, h->param.mastering_display.i_green_y );
|
||||
bs_write( &q, 16, h->param.mastering_display.i_blue_x );
|
||||
bs_write( &q, 16, h->param.mastering_display.i_blue_y );
|
||||
bs_write( &q, 16, h->param.mastering_display.i_red_x );
|
||||
bs_write( &q, 16, h->param.mastering_display.i_red_y );
|
||||
bs_write( &q, 16, h->param.mastering_display.i_white_x );
|
||||
bs_write( &q, 16, h->param.mastering_display.i_white_y );
|
||||
bs_write32( &q, h->param.mastering_display.i_display_max );
|
||||
bs_write32( &q, h->param.mastering_display.i_display_min );
|
||||
|
||||
bs_align_10( &q );
|
||||
|
||||
x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_MASTERING_DISPLAY );
|
||||
}
|
||||
|
||||
void x264_sei_content_light_level_write( x264_t *h, bs_t *s )
|
||||
{
|
||||
bs_t q;
|
||||
ALIGNED_4( uint8_t tmp_buf[100] );
|
||||
M32( tmp_buf ) = 0; // shut up gcc
|
||||
bs_init( &q, tmp_buf, 100 );
|
||||
|
||||
bs_realign( &q );
|
||||
|
||||
bs_write( &q, 16, h->param.content_light_level.i_max_cll );
|
||||
bs_write( &q, 16, h->param.content_light_level.i_max_fall );
|
||||
|
||||
bs_align_10( &q );
|
||||
|
||||
x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_CONTENT_LIGHT_LEVEL );
|
||||
}
|
||||
|
||||
void x264_sei_alternative_transfer_write( x264_t *h, bs_t *s )
|
||||
{
|
||||
bs_t q;
|
||||
ALIGNED_4( uint8_t tmp_buf[100] );
|
||||
M32( tmp_buf ) = 0; // shut up gcc
|
||||
bs_init( &q, tmp_buf, 100 );
|
||||
|
||||
bs_realign( &q );
|
||||
|
||||
bs_write ( &q, 8, h->param.i_alternative_transfer ); // preferred_transfer_characteristics
|
||||
|
||||
bs_align_10( &q );
|
||||
|
||||
x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_ALTERNATIVE_TRANSFER );
|
||||
}
|
||||
|
||||
void x264_filler_write( x264_t *h, bs_t *s, int filler )
|
||||
{
|
||||
bs_realign( s );
|
||||
|
||||
for( int i = 0; i < filler; i++ )
|
||||
bs_write( s, 8, 0xff );
|
||||
|
||||
bs_rbsp_trailing( s );
|
||||
bs_flush( s );
|
||||
}
|
||||
|
||||
void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s )
|
||||
{
|
||||
x264_slice_header_t *sh = &h->sh_backup;
|
||||
bs_t q;
|
||||
ALIGNED_4( uint8_t tmp_buf[100] );
|
||||
M32( tmp_buf ) = 0; // shut up gcc
|
||||
bs_init( &q, tmp_buf, 100 );
|
||||
|
||||
bs_realign( &q );
|
||||
|
||||
/* We currently only use this for repeating B-refs, as required by Blu-ray. */
|
||||
bs_write1( &q, 0 ); //original_idr_flag
|
||||
bs_write_ue( &q, sh->i_frame_num ); //original_frame_num
|
||||
if( !h->sps->b_frame_mbs_only )
|
||||
bs_write1( &q, 0 ); //original_field_pic_flag
|
||||
|
||||
bs_write1( &q, sh->i_mmco_command_count > 0 );
|
||||
if( sh->i_mmco_command_count > 0 )
|
||||
{
|
||||
for( int i = 0; i < sh->i_mmco_command_count; i++ )
|
||||
{
|
||||
bs_write_ue( &q, 1 );
|
||||
bs_write_ue( &q, sh->mmco[i].i_difference_of_pic_nums - 1 );
|
||||
}
|
||||
bs_write_ue( &q, 0 );
|
||||
}
|
||||
|
||||
bs_align_10( &q );
|
||||
|
||||
x264_sei_write( s, tmp_buf, bs_pos( &q ) / 8, SEI_DEC_REF_PIC_MARKING );
|
||||
}
|
||||
|
||||
int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s )
|
||||
{
|
||||
uint8_t data[512];
|
||||
const char *msg = "UMID";
|
||||
const int len = 497;
|
||||
|
||||
memset( data, 0xff, len );
|
||||
memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
|
||||
memcpy( data+16, msg, strlen(msg) );
|
||||
|
||||
data[20] = 0x13;
|
||||
/* These bytes appear to be some sort of frame/seconds counter in certain applications,
|
||||
* but others jump around, so leave them as zero for now */
|
||||
data[22] = data[23] = data[25] = data[26] = 0;
|
||||
data[28] = 0x14;
|
||||
data[30] = data[31] = data[33] = data[34] = 0;
|
||||
data[36] = 0x60;
|
||||
data[41] = 0x22; /* Believed to be some sort of end of basic UMID identifier */
|
||||
data[60] = 0x62;
|
||||
data[62] = data[63] = data[65] = data[66] = 0;
|
||||
data[68] = 0x63;
|
||||
data[70] = data[71] = data[73] = data[74] = 0;
|
||||
|
||||
x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len )
|
||||
{
|
||||
uint8_t data[6000];
|
||||
const char *msg = "VANC";
|
||||
if( len < 0 || (unsigned)len > sizeof(data) )
|
||||
{
|
||||
x264_log( h, X264_LOG_ERROR, "AVC-Intra SEI is too large (%d)\n", len );
|
||||
return -1;
|
||||
}
|
||||
|
||||
memset( data, 0xff, len );
|
||||
memcpy( data, avcintra_uuid, sizeof(avcintra_uuid) );
|
||||
memcpy( data+16, msg, strlen(msg) );
|
||||
|
||||
x264_sei_write( &h->out.bs, data, len, SEI_USER_DATA_UNREGISTERED );
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#undef ERROR
|
||||
#define ERROR(...)\
|
||||
{\
|
||||
if( verbose )\
|
||||
x264_log( h, X264_LOG_WARNING, __VA_ARGS__ );\
|
||||
ret = 1;\
|
||||
}
|
||||
|
||||
int x264_validate_levels( x264_t *h, int verbose )
|
||||
{
|
||||
int ret = 0;
|
||||
int mbs = h->sps->i_mb_width * h->sps->i_mb_height;
|
||||
int dpb = mbs * h->sps->vui.i_max_dec_frame_buffering;
|
||||
int cbp_factor = h->sps->i_profile_idc>=PROFILE_HIGH422 ? 16 :
|
||||
h->sps->i_profile_idc==PROFILE_HIGH10 ? 12 :
|
||||
h->sps->i_profile_idc==PROFILE_HIGH ? 5 : 4;
|
||||
|
||||
const x264_level_t *l = x264_levels;
|
||||
while( l->level_idc != 0 && l->level_idc != h->param.i_level_idc )
|
||||
l++;
|
||||
|
||||
if( l->frame_size < mbs
|
||||
|| l->frame_size*8 < h->sps->i_mb_width * h->sps->i_mb_width
|
||||
|| l->frame_size*8 < h->sps->i_mb_height * h->sps->i_mb_height )
|
||||
ERROR( "frame MB size (%dx%d) > level limit (%d)\n",
|
||||
h->sps->i_mb_width, h->sps->i_mb_height, l->frame_size );
|
||||
if( dpb > l->dpb )
|
||||
ERROR( "DPB size (%d frames, %d mbs) > level limit (%d frames, %d mbs)\n",
|
||||
h->sps->vui.i_max_dec_frame_buffering, dpb, l->dpb / mbs, l->dpb );
|
||||
|
||||
#define CHECK( name, limit, val ) \
|
||||
if( (val) > (limit) ) \
|
||||
ERROR( name " (%"PRId64") > level limit (%d)\n", (int64_t)(val), (limit) );
|
||||
|
||||
CHECK( "VBV bitrate", (l->bitrate * cbp_factor) / 4, h->param.rc.i_vbv_max_bitrate );
|
||||
CHECK( "VBV buffer", (l->cpb * cbp_factor) / 4, h->param.rc.i_vbv_buffer_size );
|
||||
CHECK( "MV range", l->mv_range, h->param.analyse.i_mv_range );
|
||||
CHECK( "interlaced", !l->frame_only, h->param.b_interlaced );
|
||||
CHECK( "fake interlaced", !l->frame_only, h->param.b_fake_interlaced );
|
||||
|
||||
if( h->param.i_fps_den > 0 )
|
||||
CHECK( "MB rate", l->mbps, (int64_t)mbs * h->param.i_fps_num / h->param.i_fps_den );
|
||||
|
||||
/* TODO check the rest of the limits */
|
||||
return ret;
|
||||
}
|
||||
71
encoder/set.h
Normal file
71
encoder/set.h
Normal file
@@ -0,0 +1,71 @@
|
||||
/*****************************************************************************
|
||||
* set.h: header writing
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2003-2025 x264 project
|
||||
*
|
||||
* Authors: Laurent Aimar <fenrir@via.ecp.fr>
|
||||
* Loren Merritt <lorenm@u.washington.edu>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_ENCODER_SET_H
|
||||
#define X264_ENCODER_SET_H
|
||||
|
||||
#define x264_sps_init x264_template(sps_init)
|
||||
void x264_sps_init( x264_sps_t *sps, int i_id, x264_param_t *param );
|
||||
#define x264_sps_init_reconfigurable x264_template(sps_init_reconfigurable)
|
||||
void x264_sps_init_reconfigurable( x264_sps_t *sps, x264_param_t *param );
|
||||
#define x264_sps_init_scaling_list x264_template(sps_init_scaling_list)
|
||||
void x264_sps_init_scaling_list( x264_sps_t *sps, x264_param_t *param );
|
||||
#define x264_sps_write x264_template(sps_write)
|
||||
void x264_sps_write( bs_t *s, x264_sps_t *sps );
|
||||
#define x264_pps_init x264_template(pps_init)
|
||||
void x264_pps_init( x264_pps_t *pps, int i_id, x264_param_t *param, x264_sps_t *sps );
|
||||
#define x264_pps_write x264_template(pps_write)
|
||||
void x264_pps_write( bs_t *s, x264_sps_t *sps, x264_pps_t *pps );
|
||||
#define x264_sei_recovery_point_write x264_template(sei_recovery_point_write)
|
||||
void x264_sei_recovery_point_write( x264_t *h, bs_t *s, int recovery_frame_cnt );
|
||||
#define x264_sei_version_write x264_template(sei_version_write)
|
||||
int x264_sei_version_write( x264_t *h, bs_t *s );
|
||||
#define x264_validate_levels x264_template(validate_levels)
|
||||
int x264_validate_levels( x264_t *h, int verbose );
|
||||
#define x264_sei_buffering_period_write x264_template(sei_buffering_period_write)
|
||||
void x264_sei_buffering_period_write( x264_t *h, bs_t *s );
|
||||
#define x264_sei_pic_timing_write x264_template(sei_pic_timing_write)
|
||||
void x264_sei_pic_timing_write( x264_t *h, bs_t *s );
|
||||
#define x264_sei_dec_ref_pic_marking_write x264_template(sei_dec_ref_pic_marking_write)
|
||||
void x264_sei_dec_ref_pic_marking_write( x264_t *h, bs_t *s );
|
||||
#define x264_sei_frame_packing_write x264_template(sei_frame_packing_write)
|
||||
void x264_sei_frame_packing_write( x264_t *h, bs_t *s );
|
||||
#define x264_sei_mastering_display_write x264_template(sei_mastering_display_write)
|
||||
void x264_sei_mastering_display_write( x264_t *h, bs_t *s );
|
||||
#define x264_sei_content_light_level_write x264_template(sei_content_light_level_write)
|
||||
void x264_sei_content_light_level_write( x264_t *h, bs_t *s );
|
||||
#define x264_sei_alternative_transfer_write x264_template(sei_alternative_transfer_write)
|
||||
void x264_sei_alternative_transfer_write( x264_t *h, bs_t *s );
|
||||
#define x264_sei_avcintra_umid_write x264_template(sei_avcintra_umid_write)
|
||||
int x264_sei_avcintra_umid_write( x264_t *h, bs_t *s );
|
||||
#define x264_sei_avcintra_vanc_write x264_template(sei_avcintra_vanc_write)
|
||||
int x264_sei_avcintra_vanc_write( x264_t *h, bs_t *s, int len );
|
||||
#define x264_sei_write x264_template(sei_write)
|
||||
void x264_sei_write( bs_t *s, uint8_t *payload, int payload_size, int payload_type );
|
||||
#define x264_filler_write x264_template(filler_write)
|
||||
void x264_filler_write( x264_t *h, bs_t *s, int filler );
|
||||
|
||||
#endif
|
||||
782
encoder/slicetype-cl.c
Normal file
782
encoder/slicetype-cl.c
Normal file
@@ -0,0 +1,782 @@
|
||||
/*****************************************************************************
|
||||
* slicetype-cl.c: OpenCL slicetype decision code (lowres lookahead)
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2012-2025 x264 project
|
||||
*
|
||||
* Authors: Steve Borho <sborho@multicorewareinc.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common/common.h"
|
||||
#include "macroblock.h"
|
||||
#include "me.h"
|
||||
#include "slicetype-cl.h"
|
||||
|
||||
#if HAVE_OPENCL
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#define x264_weights_analyse x264_template(weights_analyse)
|
||||
void x264_weights_analyse( x264_t *h, x264_frame_t *fenc, x264_frame_t *ref, int b_lookahead );
|
||||
|
||||
/* We define CL_QUEUE_THREAD_HANDLE_AMD here because it is not defined
|
||||
* in the OpenCL headers shipped with NVIDIA drivers. We need to be
|
||||
* able to compile on an NVIDIA machine and run optimally on an AMD GPU. */
|
||||
#define CL_QUEUE_THREAD_HANDLE_AMD 0x403E
|
||||
|
||||
#define OCLCHECK( method, ... )\
|
||||
do\
|
||||
{\
|
||||
if( h->opencl.b_fatal_error )\
|
||||
return -1;\
|
||||
status = ocl->method( __VA_ARGS__ );\
|
||||
if( status != CL_SUCCESS ) {\
|
||||
h->param.b_opencl = 0;\
|
||||
h->opencl.b_fatal_error = 1;\
|
||||
x264_log( h, X264_LOG_ERROR, # method " error '%d'\n", status );\
|
||||
return -1;\
|
||||
}\
|
||||
} while( 0 )
|
||||
|
||||
void x264_opencl_flush( x264_t *h )
|
||||
{
|
||||
x264_opencl_function_t *ocl = h->opencl.ocl;
|
||||
|
||||
ocl->clFinish( h->opencl.queue );
|
||||
|
||||
/* Finish copies from the GPU by copying from the page-locked buffer to
|
||||
* their final destination */
|
||||
for( int i = 0; i < h->opencl.num_copies; i++ )
|
||||
memcpy( h->opencl.copies[i].dest, h->opencl.copies[i].src, h->opencl.copies[i].bytes );
|
||||
h->opencl.num_copies = 0;
|
||||
h->opencl.pl_occupancy = 0;
|
||||
}
|
||||
|
||||
static void *opencl_alloc_locked( x264_t *h, int bytes )
|
||||
{
|
||||
if( h->opencl.pl_occupancy + bytes >= PAGE_LOCKED_BUF_SIZE )
|
||||
x264_opencl_flush( h );
|
||||
assert( bytes < PAGE_LOCKED_BUF_SIZE );
|
||||
char *ptr = h->opencl.page_locked_ptr + h->opencl.pl_occupancy;
|
||||
h->opencl.pl_occupancy += bytes;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
int x264_opencl_lowres_init( x264_t *h, x264_frame_t *fenc, int lambda )
|
||||
{
|
||||
if( fenc->b_intra_calculated )
|
||||
return 0;
|
||||
fenc->b_intra_calculated = 1;
|
||||
|
||||
x264_opencl_function_t *ocl = h->opencl.ocl;
|
||||
int luma_length = fenc->i_stride[0] * fenc->i_lines[0];
|
||||
|
||||
#define CREATEBUF( out, flags, size )\
|
||||
out = ocl->clCreateBuffer( h->opencl.context, (flags), (size), NULL, &status );\
|
||||
if( status != CL_SUCCESS ) { h->param.b_opencl = 0; x264_log( h, X264_LOG_ERROR, "clCreateBuffer error '%d'\n", status ); return -1; }
|
||||
#define CREATEIMAGE( out, flags, pf, width, height )\
|
||||
out = ocl->clCreateImage2D( h->opencl.context, (flags), &pf, width, height, 0, NULL, &status );\
|
||||
if( status != CL_SUCCESS ) { h->param.b_opencl = 0; x264_log( h, X264_LOG_ERROR, "clCreateImage2D error '%d'\n", status ); return -1; }
|
||||
|
||||
int mb_count = h->mb.i_mb_count;
|
||||
cl_int status;
|
||||
|
||||
if( !h->opencl.lowres_mv_costs )
|
||||
{
|
||||
/* Allocate shared memory buffers */
|
||||
int width = h->mb.i_mb_width * 8 * SIZEOF_PIXEL;
|
||||
int height = h->mb.i_mb_height * 8 * SIZEOF_PIXEL;
|
||||
|
||||
cl_image_format pixel_format;
|
||||
pixel_format.image_channel_order = CL_R;
|
||||
pixel_format.image_channel_data_type = CL_UNSIGNED_INT32;
|
||||
CREATEIMAGE( h->opencl.weighted_luma_hpel, CL_MEM_READ_WRITE, pixel_format, width, height );
|
||||
|
||||
for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
|
||||
{
|
||||
pixel_format.image_channel_order = CL_RGBA;
|
||||
pixel_format.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||
CREATEIMAGE( h->opencl.weighted_scaled_images[i], CL_MEM_READ_WRITE, pixel_format, width, height );
|
||||
width >>= 1;
|
||||
height >>= 1;
|
||||
}
|
||||
|
||||
CREATEBUF( h->opencl.lowres_mv_costs, CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) );
|
||||
CREATEBUF( h->opencl.lowres_costs[0], CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) );
|
||||
CREATEBUF( h->opencl.lowres_costs[1], CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) );
|
||||
CREATEBUF( h->opencl.mv_buffers[0], CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) * 2 );
|
||||
CREATEBUF( h->opencl.mv_buffers[1], CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) * 2 );
|
||||
CREATEBUF( h->opencl.mvp_buffer, CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) * 2 );
|
||||
CREATEBUF( h->opencl.frame_stats[0], CL_MEM_WRITE_ONLY, 4 * sizeof(int) );
|
||||
CREATEBUF( h->opencl.frame_stats[1], CL_MEM_WRITE_ONLY, 4 * sizeof(int) );
|
||||
CREATEBUF( h->opencl.row_satds[0], CL_MEM_WRITE_ONLY, h->mb.i_mb_height * sizeof(int) );
|
||||
CREATEBUF( h->opencl.row_satds[1], CL_MEM_WRITE_ONLY, h->mb.i_mb_height * sizeof(int) );
|
||||
CREATEBUF( h->opencl.luma_16x16_image[0], CL_MEM_READ_ONLY, luma_length );
|
||||
CREATEBUF( h->opencl.luma_16x16_image[1], CL_MEM_READ_ONLY, luma_length );
|
||||
}
|
||||
|
||||
if( !fenc->opencl.intra_cost )
|
||||
{
|
||||
/* Allocate per-frame buffers */
|
||||
int width = h->mb.i_mb_width * 8 * SIZEOF_PIXEL;
|
||||
int height = h->mb.i_mb_height * 8 * SIZEOF_PIXEL;
|
||||
|
||||
cl_image_format pixel_format;
|
||||
pixel_format.image_channel_order = CL_R;
|
||||
pixel_format.image_channel_data_type = CL_UNSIGNED_INT32;
|
||||
CREATEIMAGE( fenc->opencl.luma_hpel, CL_MEM_READ_WRITE, pixel_format, width, height );
|
||||
|
||||
for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
|
||||
{
|
||||
pixel_format.image_channel_order = CL_RGBA;
|
||||
pixel_format.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||
CREATEIMAGE( fenc->opencl.scaled_image2Ds[i], CL_MEM_READ_WRITE, pixel_format, width, height );
|
||||
width >>= 1;
|
||||
height >>= 1;
|
||||
}
|
||||
CREATEBUF( fenc->opencl.inv_qscale_factor, CL_MEM_READ_ONLY, mb_count * sizeof(int16_t) );
|
||||
CREATEBUF( fenc->opencl.intra_cost, CL_MEM_WRITE_ONLY, mb_count * sizeof(int16_t) );
|
||||
CREATEBUF( fenc->opencl.lowres_mvs0, CL_MEM_READ_WRITE, mb_count * 2 * sizeof(int16_t) * (h->param.i_bframe + 1) );
|
||||
CREATEBUF( fenc->opencl.lowres_mvs1, CL_MEM_READ_WRITE, mb_count * 2 * sizeof(int16_t) * (h->param.i_bframe + 1) );
|
||||
CREATEBUF( fenc->opencl.lowres_mv_costs0, CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) * (h->param.i_bframe + 1) );
|
||||
CREATEBUF( fenc->opencl.lowres_mv_costs1, CL_MEM_READ_WRITE, mb_count * sizeof(int16_t) * (h->param.i_bframe + 1) );
|
||||
}
|
||||
#undef CREATEBUF
|
||||
#undef CREATEIMAGE
|
||||
|
||||
/* Copy image to the GPU, downscale to unpadded 8x8, then continue for all scales */
|
||||
|
||||
char *locked = opencl_alloc_locked( h, luma_length );
|
||||
memcpy( locked, fenc->plane[0], luma_length );
|
||||
OCLCHECK( clEnqueueWriteBuffer, h->opencl.queue, h->opencl.luma_16x16_image[h->opencl.last_buf], CL_FALSE, 0, luma_length, locked, 0, NULL, NULL );
|
||||
|
||||
size_t gdim[2];
|
||||
if( h->param.rc.i_aq_mode && fenc->i_inv_qscale_factor )
|
||||
{
|
||||
int size = h->mb.i_mb_count * sizeof(int16_t);
|
||||
locked = opencl_alloc_locked( h, size );
|
||||
memcpy( locked, fenc->i_inv_qscale_factor, size );
|
||||
OCLCHECK( clEnqueueWriteBuffer, h->opencl.queue, fenc->opencl.inv_qscale_factor, CL_FALSE, 0, size, locked, 0, NULL, NULL );
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Fill fenc->opencl.inv_qscale_factor with NOP (256) */
|
||||
cl_uint arg = 0;
|
||||
int16_t value = 256;
|
||||
OCLCHECK( clSetKernelArg, h->opencl.memset_kernel, arg++, sizeof(cl_mem), &fenc->opencl.inv_qscale_factor );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.memset_kernel, arg++, sizeof(int16_t), &value );
|
||||
gdim[0] = h->mb.i_mb_count;
|
||||
OCLCHECK( clEnqueueNDRangeKernel, h->opencl.queue, h->opencl.memset_kernel, 1, NULL, gdim, NULL, 0, NULL, NULL );
|
||||
}
|
||||
|
||||
int stride = fenc->i_stride[0];
|
||||
cl_uint arg = 0;
|
||||
OCLCHECK( clSetKernelArg, h->opencl.downscale_hpel_kernel, arg++, sizeof(cl_mem), &h->opencl.luma_16x16_image[h->opencl.last_buf] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.downscale_hpel_kernel, arg++, sizeof(cl_mem), &fenc->opencl.scaled_image2Ds[0] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.downscale_hpel_kernel, arg++, sizeof(cl_mem), &fenc->opencl.luma_hpel );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.downscale_hpel_kernel, arg++, sizeof(int), &stride );
|
||||
gdim[0] = 8 * h->mb.i_mb_width;
|
||||
gdim[1] = 8 * h->mb.i_mb_height;
|
||||
OCLCHECK( clEnqueueNDRangeKernel, h->opencl.queue, h->opencl.downscale_hpel_kernel, 2, NULL, gdim, NULL, 0, NULL, NULL );
|
||||
|
||||
for( int i = 0; i < NUM_IMAGE_SCALES - 1; i++ )
|
||||
{
|
||||
/* Workaround for AMD Southern Island:
|
||||
*
|
||||
* Alternate kernel instances. No perf impact to this, so we do it for
|
||||
* all GPUs. It prevents the same kernel from being enqueued
|
||||
* back-to-back, avoiding a dependency calculation bug in the driver.
|
||||
*/
|
||||
cl_kernel kern = i & 1 ? h->opencl.downscale_kernel1 : h->opencl.downscale_kernel2;
|
||||
|
||||
arg = 0;
|
||||
OCLCHECK( clSetKernelArg, kern, arg++, sizeof(cl_mem), &fenc->opencl.scaled_image2Ds[i] );
|
||||
OCLCHECK( clSetKernelArg, kern, arg++, sizeof(cl_mem), &fenc->opencl.scaled_image2Ds[i+1] );
|
||||
gdim[0] >>= 1;
|
||||
gdim[1] >>= 1;
|
||||
if( gdim[0] < 16 || gdim[1] < 16 )
|
||||
break;
|
||||
OCLCHECK( clEnqueueNDRangeKernel, h->opencl.queue, kern, 2, NULL, gdim, NULL, 0, NULL, NULL );
|
||||
}
|
||||
|
||||
size_t ldim[2];
|
||||
gdim[0] = ((h->mb.i_mb_width + 31)>>5)<<5;
|
||||
gdim[1] = 8*h->mb.i_mb_height;
|
||||
ldim[0] = 32;
|
||||
ldim[1] = 8;
|
||||
arg = 0;
|
||||
|
||||
/* For presets slow, slower, and placebo, check all 10 intra modes that the
|
||||
* C lookahead supports. For faster presets, only check the most frequent 8
|
||||
* modes
|
||||
*/
|
||||
int slow = h->param.analyse.i_subpel_refine > 7;
|
||||
OCLCHECK( clSetKernelArg, h->opencl.intra_kernel, arg++, sizeof(cl_mem), &fenc->opencl.scaled_image2Ds[0] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.intra_kernel, arg++, sizeof(cl_mem), &fenc->opencl.intra_cost );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.intra_kernel, arg++, sizeof(cl_mem), &h->opencl.frame_stats[h->opencl.last_buf] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.intra_kernel, arg++, sizeof(int), &lambda );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.intra_kernel, arg++, sizeof(int), &h->mb.i_mb_width );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.intra_kernel, arg++, sizeof(int), &slow );
|
||||
OCLCHECK( clEnqueueNDRangeKernel, h->opencl.queue, h->opencl.intra_kernel, 2, NULL, gdim, ldim, 0, NULL, NULL );
|
||||
|
||||
gdim[0] = 256;
|
||||
gdim[1] = h->mb.i_mb_height;
|
||||
ldim[0] = 256;
|
||||
ldim[1] = 1;
|
||||
arg = 0;
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_intra_kernel, arg++, sizeof(cl_mem), &fenc->opencl.intra_cost );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_intra_kernel, arg++, sizeof(cl_mem), &fenc->opencl.inv_qscale_factor );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_intra_kernel, arg++, sizeof(cl_mem), &h->opencl.row_satds[h->opencl.last_buf] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_intra_kernel, arg++, sizeof(cl_mem), &h->opencl.frame_stats[h->opencl.last_buf] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_intra_kernel, arg++, sizeof(int), &h->mb.i_mb_width );
|
||||
OCLCHECK( clEnqueueNDRangeKernel, h->opencl.queue, h->opencl.rowsum_intra_kernel, 2, NULL, gdim, ldim, 0, NULL, NULL );
|
||||
|
||||
if( h->opencl.num_copies >= MAX_FINISH_COPIES - 4 )
|
||||
x264_opencl_flush( h );
|
||||
|
||||
int size = h->mb.i_mb_count * sizeof(int16_t);
|
||||
locked = opencl_alloc_locked( h, size );
|
||||
OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, fenc->opencl.intra_cost, CL_FALSE, 0, size, locked, 0, NULL, NULL );
|
||||
h->opencl.copies[h->opencl.num_copies].dest = fenc->lowres_costs[0][0];
|
||||
h->opencl.copies[h->opencl.num_copies].src = locked;
|
||||
h->opencl.copies[h->opencl.num_copies].bytes = size;
|
||||
h->opencl.num_copies++;
|
||||
|
||||
size = h->mb.i_mb_height * sizeof(int);
|
||||
locked = opencl_alloc_locked( h, size );
|
||||
OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, h->opencl.row_satds[h->opencl.last_buf], CL_FALSE, 0, size, locked, 0, NULL, NULL );
|
||||
h->opencl.copies[h->opencl.num_copies].dest = fenc->i_row_satds[0][0];
|
||||
h->opencl.copies[h->opencl.num_copies].src = locked;
|
||||
h->opencl.copies[h->opencl.num_copies].bytes = size;
|
||||
h->opencl.num_copies++;
|
||||
|
||||
size = sizeof(int) * 4;
|
||||
locked = opencl_alloc_locked( h, size );
|
||||
OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, h->opencl.frame_stats[h->opencl.last_buf], CL_FALSE, 0, size, locked, 0, NULL, NULL );
|
||||
h->opencl.copies[h->opencl.num_copies].dest = &fenc->i_cost_est[0][0];
|
||||
h->opencl.copies[h->opencl.num_copies].src = locked;
|
||||
h->opencl.copies[h->opencl.num_copies].bytes = sizeof(int);
|
||||
h->opencl.num_copies++;
|
||||
h->opencl.copies[h->opencl.num_copies].dest = &fenc->i_cost_est_aq[0][0];
|
||||
h->opencl.copies[h->opencl.num_copies].src = locked + sizeof(int);
|
||||
h->opencl.copies[h->opencl.num_copies].bytes = sizeof(int);
|
||||
h->opencl.num_copies++;
|
||||
|
||||
h->opencl.last_buf = !h->opencl.last_buf;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This function was tested empirically on a number of AMD and NV GPUs. Making a
|
||||
* function which returns perfect launch dimensions is impossible; some
|
||||
* applications will have self-tuning code to try many possible variables and
|
||||
* measure the runtime. Here we simply make an educated guess based on what we
|
||||
* know GPUs typically prefer. */
|
||||
static void optimal_launch_dims( x264_t *h, size_t *gdims, size_t *ldims, const cl_kernel kernel, const cl_device_id device )
|
||||
{
|
||||
x264_opencl_function_t *ocl = h->opencl.ocl;
|
||||
size_t max_work_group = 256; /* reasonable defaults for OpenCL 1.0 devices, below APIs may fail */
|
||||
size_t preferred_multiple = 64;
|
||||
cl_uint num_cus = 6;
|
||||
|
||||
ocl->clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_work_group, NULL );
|
||||
ocl->clGetKernelWorkGroupInfo( kernel, device, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &preferred_multiple, NULL );
|
||||
ocl->clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &num_cus, NULL );
|
||||
|
||||
ldims[0] = preferred_multiple;
|
||||
ldims[1] = 8;
|
||||
|
||||
/* make ldims[1] an even divisor of gdims[1] */
|
||||
while( gdims[1] & (ldims[1] - 1) )
|
||||
{
|
||||
ldims[0] <<= 1;
|
||||
ldims[1] >>= 1;
|
||||
}
|
||||
/* make total ldims fit under the max work-group dimensions for the device */
|
||||
while( ldims[0] * ldims[1] > max_work_group )
|
||||
{
|
||||
if( (ldims[0] <= preferred_multiple) && (ldims[1] > 1) )
|
||||
ldims[1] >>= 1;
|
||||
else
|
||||
ldims[0] >>= 1;
|
||||
}
|
||||
|
||||
if( ldims[0] > gdims[0] )
|
||||
{
|
||||
/* remove preferred multiples until we're close to gdims[0] */
|
||||
while( gdims[0] + preferred_multiple < ldims[0] )
|
||||
ldims[0] -= preferred_multiple;
|
||||
gdims[0] = ldims[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
/* make gdims an even multiple of ldims */
|
||||
gdims[0] = (gdims[0]+ldims[0]-1)/ldims[0];
|
||||
gdims[0] *= ldims[0];
|
||||
}
|
||||
|
||||
/* make ldims smaller to spread work across compute units */
|
||||
while( (gdims[0]/ldims[0]) * (gdims[1]/ldims[1]) * 2 <= num_cus )
|
||||
{
|
||||
if( ldims[0] > preferred_multiple )
|
||||
ldims[0] >>= 1;
|
||||
else if( ldims[1] > 1 )
|
||||
ldims[1] >>= 1;
|
||||
else
|
||||
break;
|
||||
}
|
||||
/* for smaller GPUs, try not to abuse their texture cache */
|
||||
if( num_cus == 6 && ldims[0] == 64 && ldims[1] == 4 )
|
||||
ldims[0] = 32;
|
||||
}
|
||||
|
||||
int x264_opencl_motionsearch( x264_t *h, x264_frame_t **frames, int b, int ref, int b_islist1, int lambda, const x264_weight_t *w )
|
||||
{
|
||||
x264_opencl_function_t *ocl = h->opencl.ocl;
|
||||
x264_frame_t *fenc = frames[b];
|
||||
x264_frame_t *fref = frames[ref];
|
||||
|
||||
cl_mem ref_scaled_images[NUM_IMAGE_SCALES];
|
||||
cl_mem ref_luma_hpel;
|
||||
cl_int status;
|
||||
|
||||
if( w && w->weightfn )
|
||||
{
|
||||
size_t gdims[2];
|
||||
|
||||
gdims[0] = 8 * h->mb.i_mb_width;
|
||||
gdims[1] = 8 * h->mb.i_mb_height;
|
||||
|
||||
/* WeightP: Perform a filter on fref->opencl.scaled_image2Ds[] and fref->opencl.luma_hpel */
|
||||
for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
|
||||
{
|
||||
cl_uint arg = 0;
|
||||
OCLCHECK( clSetKernelArg, h->opencl.weightp_scaled_images_kernel, arg++, sizeof(cl_mem), &fref->opencl.scaled_image2Ds[i] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.weightp_scaled_images_kernel, arg++, sizeof(cl_mem), &h->opencl.weighted_scaled_images[i] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.weightp_scaled_images_kernel, arg++, sizeof(int32_t), &w->i_offset );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.weightp_scaled_images_kernel, arg++, sizeof(int32_t), &w->i_scale );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.weightp_scaled_images_kernel, arg++, sizeof(int32_t), &w->i_denom );
|
||||
OCLCHECK( clEnqueueNDRangeKernel, h->opencl.queue, h->opencl.weightp_scaled_images_kernel, 2, NULL, gdims, NULL, 0, NULL, NULL );
|
||||
|
||||
gdims[0] >>= 1;
|
||||
gdims[1] >>= 1;
|
||||
if( gdims[0] < 16 || gdims[1] < 16 )
|
||||
break;
|
||||
}
|
||||
|
||||
cl_uint arg = 0;
|
||||
gdims[0] = 8 * h->mb.i_mb_width;
|
||||
gdims[1] = 8 * h->mb.i_mb_height;
|
||||
|
||||
OCLCHECK( clSetKernelArg, h->opencl.weightp_hpel_kernel, arg++, sizeof(cl_mem), &fref->opencl.luma_hpel );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.weightp_hpel_kernel, arg++, sizeof(cl_mem), &h->opencl.weighted_luma_hpel );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.weightp_hpel_kernel, arg++, sizeof(int32_t), &w->i_offset );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.weightp_hpel_kernel, arg++, sizeof(int32_t), &w->i_scale );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.weightp_hpel_kernel, arg++, sizeof(int32_t), &w->i_denom );
|
||||
OCLCHECK( clEnqueueNDRangeKernel, h->opencl.queue, h->opencl.weightp_hpel_kernel, 2, NULL, gdims, NULL, 0, NULL, NULL );
|
||||
|
||||
/* Use weighted reference planes for motion search */
|
||||
for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
|
||||
ref_scaled_images[i] = h->opencl.weighted_scaled_images[i];
|
||||
ref_luma_hpel = h->opencl.weighted_luma_hpel;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Use unweighted reference planes for motion search */
|
||||
for( int i = 0; i < NUM_IMAGE_SCALES; i++ )
|
||||
ref_scaled_images[i] = fref->opencl.scaled_image2Ds[i];
|
||||
ref_luma_hpel = fref->opencl.luma_hpel;
|
||||
}
|
||||
|
||||
const int num_iterations[NUM_IMAGE_SCALES] = { 1, 1, 2, 3 };
|
||||
int b_first_iteration = 1;
|
||||
int b_reverse_references = 1;
|
||||
int A = 1;
|
||||
|
||||
|
||||
int mb_per_group = 0;
|
||||
int cost_local_size = 0;
|
||||
int mvc_local_size = 0;
|
||||
int mb_width;
|
||||
|
||||
size_t gdims[2];
|
||||
size_t ldims[2];
|
||||
|
||||
/* scale 0 is 8x8 */
|
||||
for( int scale = NUM_IMAGE_SCALES-1; scale >= 0; scale-- )
|
||||
{
|
||||
mb_width = h->mb.i_mb_width >> scale;
|
||||
gdims[0] = mb_width;
|
||||
gdims[1] = h->mb.i_mb_height >> scale;
|
||||
if( gdims[0] < 2 || gdims[1] < 2 )
|
||||
continue;
|
||||
gdims[0] <<= 2;
|
||||
optimal_launch_dims( h, gdims, ldims, h->opencl.hme_kernel, h->opencl.device );
|
||||
|
||||
mb_per_group = (ldims[0] >> 2) * ldims[1];
|
||||
cost_local_size = 4 * mb_per_group * sizeof(int16_t);
|
||||
mvc_local_size = 4 * mb_per_group * sizeof(int16_t) * 2;
|
||||
int scaled_me_range = h->param.analyse.i_me_range >> scale;
|
||||
int b_shift_index = 1;
|
||||
|
||||
cl_uint arg = 0;
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(cl_mem), &fenc->opencl.scaled_image2Ds[scale] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(cl_mem), &ref_scaled_images[scale] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(cl_mem), &h->opencl.mv_buffers[A] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(cl_mem), &h->opencl.mv_buffers[!A] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(cl_mem), &h->opencl.lowres_mv_costs );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(cl_mem), (void*)&h->opencl.mvp_buffer );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, cost_local_size, NULL );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, mvc_local_size, NULL );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(int), &mb_width );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(int), &lambda );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(int), &scaled_me_range );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(int), &scale );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(int), &b_shift_index );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(int), &b_first_iteration );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg++, sizeof(int), &b_reverse_references );
|
||||
|
||||
for( int iter = 0; iter < num_iterations[scale]; iter++ )
|
||||
{
|
||||
OCLCHECK( clEnqueueNDRangeKernel, h->opencl.queue, h->opencl.hme_kernel, 2, NULL, gdims, ldims, 0, NULL, NULL );
|
||||
|
||||
b_shift_index = 0;
|
||||
b_first_iteration = 0;
|
||||
|
||||
/* alternate top-left vs bot-right MB references at lower scales, so
|
||||
* motion field smooths more quickly. */
|
||||
if( scale > 2 )
|
||||
b_reverse_references ^= 1;
|
||||
else
|
||||
b_reverse_references = 0;
|
||||
A = !A;
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, 2, sizeof(cl_mem), &h->opencl.mv_buffers[A] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, 3, sizeof(cl_mem), &h->opencl.mv_buffers[!A] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg - 3, sizeof(int), &b_shift_index );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg - 2, sizeof(int), &b_first_iteration );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.hme_kernel, arg - 1, sizeof(int), &b_reverse_references );
|
||||
}
|
||||
}
|
||||
|
||||
int satd_local_size = mb_per_group * sizeof(uint32_t) * 16;
|
||||
cl_uint arg = 0;
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(cl_mem), &fenc->opencl.scaled_image2Ds[0] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(cl_mem), &ref_luma_hpel );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(cl_mem), &h->opencl.mv_buffers[A] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(cl_mem), &h->opencl.lowres_mv_costs );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, cost_local_size, NULL );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, satd_local_size, NULL );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, mvc_local_size, NULL );
|
||||
|
||||
if( b_islist1 )
|
||||
{
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(cl_mem), &fenc->opencl.lowres_mvs1 );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(cl_mem), &fenc->opencl.lowres_mv_costs1 );
|
||||
}
|
||||
else
|
||||
{
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(cl_mem), &fenc->opencl.lowres_mvs0 );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(cl_mem), &fenc->opencl.lowres_mv_costs0 );
|
||||
}
|
||||
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(int), &mb_width );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(int), &lambda );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(int), &b );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(int), &ref );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.subpel_refine_kernel, arg++, sizeof(int), &b_islist1 );
|
||||
|
||||
if( h->opencl.b_device_AMD_SI )
|
||||
{
|
||||
/* workaround for AMD Southern Island driver scheduling bug (fixed in
|
||||
* July 2012), perform meaningless small copy to add a data dependency */
|
||||
OCLCHECK( clEnqueueCopyBuffer, h->opencl.queue, h->opencl.mv_buffers[A], h->opencl.mv_buffers[!A], 0, 0, 20, 0, NULL, NULL );
|
||||
}
|
||||
|
||||
OCLCHECK( clEnqueueNDRangeKernel, h->opencl.queue, h->opencl.subpel_refine_kernel, 2, NULL, gdims, ldims, 0, NULL, NULL );
|
||||
|
||||
int mvlen = 2 * sizeof(int16_t) * h->mb.i_mb_count;
|
||||
|
||||
if( h->opencl.num_copies >= MAX_FINISH_COPIES - 1 )
|
||||
x264_opencl_flush( h );
|
||||
|
||||
char *locked = opencl_alloc_locked( h, mvlen );
|
||||
h->opencl.copies[h->opencl.num_copies].src = locked;
|
||||
h->opencl.copies[h->opencl.num_copies].bytes = mvlen;
|
||||
|
||||
if( b_islist1 )
|
||||
{
|
||||
int mvs_offset = mvlen * (ref - b - 1);
|
||||
OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, fenc->opencl.lowres_mvs1, CL_FALSE, mvs_offset, mvlen, locked, 0, NULL, NULL );
|
||||
h->opencl.copies[h->opencl.num_copies].dest = fenc->lowres_mvs[1][ref - b - 1];
|
||||
}
|
||||
else
|
||||
{
|
||||
int mvs_offset = mvlen * (b - ref - 1);
|
||||
OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, fenc->opencl.lowres_mvs0, CL_FALSE, mvs_offset, mvlen, locked, 0, NULL, NULL );
|
||||
h->opencl.copies[h->opencl.num_copies].dest = fenc->lowres_mvs[0][b - ref - 1];
|
||||
}
|
||||
|
||||
h->opencl.num_copies++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int x264_opencl_finalize_cost( x264_t *h, int lambda, x264_frame_t **frames, int p0, int p1, int b, int dist_scale_factor )
|
||||
{
|
||||
x264_opencl_function_t *ocl = h->opencl.ocl;
|
||||
cl_int status;
|
||||
x264_frame_t *fenc = frames[b];
|
||||
x264_frame_t *fref0 = frames[p0];
|
||||
x264_frame_t *fref1 = frames[p1];
|
||||
|
||||
int bipred_weight = h->param.analyse.b_weighted_bipred ? 64 - (dist_scale_factor >> 2) : 32;
|
||||
|
||||
/* Tasks for this kernel:
|
||||
* 1. Select least cost mode (intra, ref0, ref1)
|
||||
* list_used 0, 1, 2, or 3. if B frame, do not allow intra
|
||||
* 2. if B frame, try bidir predictions.
|
||||
* 3. lowres_costs[i_mb_xy] = X264_MIN( bcost, LOWRES_COST_MASK ) + (list_used << LOWRES_COST_SHIFT); */
|
||||
size_t gdims[2] = { h->mb.i_mb_width, h->mb.i_mb_height };
|
||||
size_t ldim_bidir[2];
|
||||
size_t *ldims = NULL;
|
||||
int cost_local_size = 4;
|
||||
int satd_local_size = 4;
|
||||
if( b < p1 )
|
||||
{
|
||||
/* For B frames, use 4 threads per MB for BIDIR checks */
|
||||
ldims = ldim_bidir;
|
||||
gdims[0] <<= 2;
|
||||
optimal_launch_dims( h, gdims, ldims, h->opencl.mode_select_kernel, h->opencl.device );
|
||||
int mb_per_group = (ldims[0] >> 2) * ldims[1];
|
||||
cost_local_size = 4 * mb_per_group * sizeof(int16_t);
|
||||
satd_local_size = 16 * mb_per_group * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
cl_uint arg = 0;
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(cl_mem), &fenc->opencl.scaled_image2Ds[0] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(cl_mem), &fref0->opencl.luma_hpel );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(cl_mem), &fref1->opencl.luma_hpel );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(cl_mem), &fenc->opencl.lowres_mvs0 );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(cl_mem), &fenc->opencl.lowres_mvs1 );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(cl_mem), &fref1->opencl.lowres_mvs0 );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(cl_mem), &fenc->opencl.lowres_mv_costs0 );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(cl_mem), &fenc->opencl.lowres_mv_costs1 );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(cl_mem), &fenc->opencl.intra_cost );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(cl_mem), &h->opencl.lowres_costs[h->opencl.last_buf] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(cl_mem), &h->opencl.frame_stats[h->opencl.last_buf] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, cost_local_size, NULL );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, satd_local_size, NULL );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(int), &h->mb.i_mb_width );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(int), &bipred_weight );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(int), &dist_scale_factor );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(int), &b );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(int), &p0 );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(int), &p1 );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.mode_select_kernel, arg++, sizeof(int), &lambda );
|
||||
OCLCHECK( clEnqueueNDRangeKernel, h->opencl.queue, h->opencl.mode_select_kernel, 2, NULL, gdims, ldims, 0, NULL, NULL );
|
||||
|
||||
/* Sum costs across rows, atomicAdd down frame */
|
||||
size_t gdim[2] = { 256, h->mb.i_mb_height };
|
||||
size_t ldim[2] = { 256, 1 };
|
||||
|
||||
arg = 0;
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_inter_kernel, arg++, sizeof(cl_mem), &h->opencl.lowres_costs[h->opencl.last_buf] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_inter_kernel, arg++, sizeof(cl_mem), &fenc->opencl.inv_qscale_factor );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_inter_kernel, arg++, sizeof(cl_mem), &h->opencl.row_satds[h->opencl.last_buf] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_inter_kernel, arg++, sizeof(cl_mem), &h->opencl.frame_stats[h->opencl.last_buf] );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_inter_kernel, arg++, sizeof(int), &h->mb.i_mb_width );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_inter_kernel, arg++, sizeof(int), &h->param.i_bframe_bias );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_inter_kernel, arg++, sizeof(int), &b );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_inter_kernel, arg++, sizeof(int), &p0 );
|
||||
OCLCHECK( clSetKernelArg, h->opencl.rowsum_inter_kernel, arg++, sizeof(int), &p1 );
|
||||
OCLCHECK( clEnqueueNDRangeKernel, h->opencl.queue, h->opencl.rowsum_inter_kernel, 2, NULL, gdim, ldim, 0, NULL, NULL );
|
||||
|
||||
if( h->opencl.num_copies >= MAX_FINISH_COPIES - 4 )
|
||||
x264_opencl_flush( h );
|
||||
|
||||
int size = h->mb.i_mb_count * sizeof(int16_t);
|
||||
char *locked = opencl_alloc_locked( h, size );
|
||||
h->opencl.copies[h->opencl.num_copies].src = locked;
|
||||
h->opencl.copies[h->opencl.num_copies].dest = fenc->lowres_costs[b - p0][p1 - b];
|
||||
h->opencl.copies[h->opencl.num_copies].bytes = size;
|
||||
OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, h->opencl.lowres_costs[h->opencl.last_buf], CL_FALSE, 0, size, locked, 0, NULL, NULL );
|
||||
h->opencl.num_copies++;
|
||||
|
||||
size = h->mb.i_mb_height * sizeof(int);
|
||||
locked = opencl_alloc_locked( h, size );
|
||||
h->opencl.copies[h->opencl.num_copies].src = locked;
|
||||
h->opencl.copies[h->opencl.num_copies].dest = fenc->i_row_satds[b - p0][p1 - b];
|
||||
h->opencl.copies[h->opencl.num_copies].bytes = size;
|
||||
OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, h->opencl.row_satds[h->opencl.last_buf], CL_FALSE, 0, size, locked, 0, NULL, NULL );
|
||||
h->opencl.num_copies++;
|
||||
|
||||
size = 4 * sizeof(int);
|
||||
locked = opencl_alloc_locked( h, size );
|
||||
OCLCHECK( clEnqueueReadBuffer, h->opencl.queue, h->opencl.frame_stats[h->opencl.last_buf], CL_FALSE, 0, size, locked, 0, NULL, NULL );
|
||||
h->opencl.last_buf = !h->opencl.last_buf;
|
||||
|
||||
h->opencl.copies[h->opencl.num_copies].src = locked;
|
||||
h->opencl.copies[h->opencl.num_copies].dest = &fenc->i_cost_est[b - p0][p1 - b];
|
||||
h->opencl.copies[h->opencl.num_copies].bytes = sizeof(int);
|
||||
h->opencl.num_copies++;
|
||||
h->opencl.copies[h->opencl.num_copies].src = locked + sizeof(int);
|
||||
h->opencl.copies[h->opencl.num_copies].dest = &fenc->i_cost_est_aq[b - p0][p1 - b];
|
||||
h->opencl.copies[h->opencl.num_copies].bytes = sizeof(int);
|
||||
h->opencl.num_copies++;
|
||||
|
||||
if( b == p1 ) // P frames only
|
||||
{
|
||||
h->opencl.copies[h->opencl.num_copies].src = locked + 2 * sizeof(int);
|
||||
h->opencl.copies[h->opencl.num_copies].dest = &fenc->i_intra_mbs[b - p0];
|
||||
h->opencl.copies[h->opencl.num_copies].bytes = sizeof(int);
|
||||
h->opencl.num_copies++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void x264_opencl_slicetype_prep( x264_t *h, x264_frame_t **frames, int num_frames, int lambda )
|
||||
{
|
||||
if( h->param.b_opencl )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
/* Temporarily boost priority of this lookahead thread and the OpenCL
|
||||
* driver's thread until the end of this function. On AMD GPUs this
|
||||
* greatly reduces the latency of enqueuing kernels and getting results
|
||||
* on Windows. */
|
||||
HANDLE id = GetCurrentThread();
|
||||
h->opencl.lookahead_thread_pri = GetThreadPriority( id );
|
||||
SetThreadPriority( id, THREAD_PRIORITY_ABOVE_NORMAL );
|
||||
x264_opencl_function_t *ocl = h->opencl.ocl;
|
||||
cl_int status = ocl->clGetCommandQueueInfo( h->opencl.queue, CL_QUEUE_THREAD_HANDLE_AMD, sizeof(HANDLE), &id, NULL );
|
||||
if( status == CL_SUCCESS )
|
||||
{
|
||||
h->opencl.opencl_thread_pri = GetThreadPriority( id );
|
||||
SetThreadPriority( id, THREAD_PRIORITY_ABOVE_NORMAL );
|
||||
}
|
||||
#endif
|
||||
|
||||
/* precalculate intra and I frames */
|
||||
for( int i = 0; i <= num_frames; i++ )
|
||||
x264_opencl_lowres_init( h, frames[i], lambda );
|
||||
x264_opencl_flush( h );
|
||||
|
||||
if( h->param.i_bframe_adaptive == X264_B_ADAPT_TRELLIS && h->param.i_bframe )
|
||||
{
|
||||
/* For trellis B-Adapt, precompute exhaustive motion searches */
|
||||
for( int b = 0; b <= num_frames; b++ )
|
||||
{
|
||||
for( int j = 1; j < h->param.i_bframe; j++ )
|
||||
{
|
||||
int p0 = b - j;
|
||||
if( p0 >= 0 && frames[b]->lowres_mvs[0][b-p0-1][0][0] == 0x7FFF )
|
||||
{
|
||||
const x264_weight_t *w = x264_weight_none;
|
||||
|
||||
if( h->param.analyse.i_weighted_pred )
|
||||
{
|
||||
x264_emms();
|
||||
x264_weights_analyse( h, frames[b], frames[p0], 1 );
|
||||
w = frames[b]->weight[0];
|
||||
}
|
||||
frames[b]->lowres_mvs[0][b-p0-1][0][0] = 0;
|
||||
x264_opencl_motionsearch( h, frames, b, p0, 0, lambda, w );
|
||||
}
|
||||
int p1 = b + j;
|
||||
if( p1 <= num_frames && frames[b]->lowres_mvs[1][p1-b-1][0][0] == 0x7FFF )
|
||||
{
|
||||
frames[b]->lowres_mvs[1][p1-b-1][0][0] = 0;
|
||||
x264_opencl_motionsearch( h, frames, b, p1, 1, lambda, NULL );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
x264_opencl_flush( h );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void x264_opencl_slicetype_end( x264_t *h )
|
||||
{
|
||||
#ifdef _WIN32
|
||||
if( h->param.b_opencl )
|
||||
{
|
||||
HANDLE id = GetCurrentThread();
|
||||
SetThreadPriority( id, h->opencl.lookahead_thread_pri );
|
||||
x264_opencl_function_t *ocl = h->opencl.ocl;
|
||||
cl_int status = ocl->clGetCommandQueueInfo( h->opencl.queue, CL_QUEUE_THREAD_HANDLE_AMD, sizeof(HANDLE), &id, NULL );
|
||||
if( status == CL_SUCCESS )
|
||||
SetThreadPriority( id, h->opencl.opencl_thread_pri );
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int x264_opencl_precalculate_frame_cost( x264_t *h, x264_frame_t **frames, int lambda, int p0, int p1, int b )
|
||||
{
|
||||
if( (frames[b]->i_cost_est[b-p0][p1-b] >= 0) || (b == p0 && b == p1) )
|
||||
return 0;
|
||||
else
|
||||
{
|
||||
int do_search[2];
|
||||
int dist_scale_factor = 128;
|
||||
const x264_weight_t *w = x264_weight_none;
|
||||
|
||||
// avoid duplicating work
|
||||
frames[b]->i_cost_est[b-p0][p1-b] = 0;
|
||||
|
||||
do_search[0] = b != p0 && frames[b]->lowres_mvs[0][b-p0-1][0][0] == 0x7FFF;
|
||||
do_search[1] = b != p1 && frames[b]->lowres_mvs[1][p1-b-1][0][0] == 0x7FFF;
|
||||
if( do_search[0] )
|
||||
{
|
||||
if( h->param.analyse.i_weighted_pred && b == p1 )
|
||||
{
|
||||
x264_emms();
|
||||
x264_weights_analyse( h, frames[b], frames[p0], 1 );
|
||||
w = frames[b]->weight[0];
|
||||
}
|
||||
frames[b]->lowres_mvs[0][b-p0-1][0][0] = 0;
|
||||
}
|
||||
if( do_search[1] )
|
||||
frames[b]->lowres_mvs[1][p1-b-1][0][0] = 0;
|
||||
if( b == p1 )
|
||||
frames[b]->i_intra_mbs[b-p0] = 0;
|
||||
if( p1 != p0 )
|
||||
dist_scale_factor = ( ((b-p0) << 8) + ((p1-p0) >> 1) ) / (p1-p0);
|
||||
|
||||
frames[b]->i_cost_est[b-p0][p1-b] = 0;
|
||||
frames[b]->i_cost_est_aq[b-p0][p1-b] = 0;
|
||||
|
||||
x264_opencl_lowres_init( h, frames[b], lambda );
|
||||
|
||||
if( do_search[0] )
|
||||
{
|
||||
x264_opencl_lowres_init( h, frames[p0], lambda );
|
||||
x264_opencl_motionsearch( h, frames, b, p0, 0, lambda, w );
|
||||
}
|
||||
if( do_search[1] )
|
||||
{
|
||||
x264_opencl_lowres_init( h, frames[p1], lambda );
|
||||
x264_opencl_motionsearch( h, frames, b, p1, 1, lambda, NULL );
|
||||
}
|
||||
x264_opencl_finalize_cost( h, lambda, frames, p0, p1, b, dist_scale_factor );
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
44
encoder/slicetype-cl.h
Normal file
44
encoder/slicetype-cl.h
Normal file
@@ -0,0 +1,44 @@
|
||||
/*****************************************************************************
|
||||
* slicetype-cl.h: OpenCL slicetype decision code (lowres lookahead)
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2017-2025 x264 project
|
||||
*
|
||||
* Authors: Anton Mitrofanov <BugMaster@narod.ru>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
|
||||
*
|
||||
* This program is also available under a commercial proprietary license.
|
||||
* For more information, contact us at licensing@x264.com.
|
||||
*****************************************************************************/
|
||||
|
||||
#ifndef X264_ENCODER_SLICETYPE_CL_H
|
||||
#define X264_ENCODER_SLICETYPE_CL_H
|
||||
|
||||
#define x264_opencl_lowres_init x264_template(opencl_lowres_init)
|
||||
int x264_opencl_lowres_init( x264_t *h, x264_frame_t *fenc, int lambda );
|
||||
#define x264_opencl_motionsearch x264_template(opencl_motionsearch)
|
||||
int x264_opencl_motionsearch( x264_t *h, x264_frame_t **frames, int b, int ref, int b_islist1, int lambda, const x264_weight_t *w );
|
||||
#define x264_opencl_finalize_cost x264_template(opencl_finalize_cost)
|
||||
int x264_opencl_finalize_cost( x264_t *h, int lambda, x264_frame_t **frames, int p0, int p1, int b, int dist_scale_factor );
|
||||
#define x264_opencl_precalculate_frame_cost x264_template(opencl_precalculate_frame_cost)
|
||||
int x264_opencl_precalculate_frame_cost( x264_t *h, x264_frame_t **frames, int lambda, int p0, int p1, int b );
|
||||
#define x264_opencl_flush x264_template(opencl_flush)
|
||||
void x264_opencl_flush( x264_t *h );
|
||||
#define x264_opencl_slicetype_prep x264_template(opencl_slicetype_prep)
|
||||
void x264_opencl_slicetype_prep( x264_t *h, x264_frame_t **frames, int num_frames, int lambda );
|
||||
#define x264_opencl_slicetype_end x264_template(opencl_slicetype_end)
|
||||
void x264_opencl_slicetype_end( x264_t *h );
|
||||
|
||||
#endif
|
||||
2036
encoder/slicetype.c
Normal file
2036
encoder/slicetype.c
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user