From 61ca2725ae41bfbf0eb983f84c0474167028c9a1 Mon Sep 17 00:00:00 2001 From: "yilun.zhang" Date: Tue, 11 Mar 2025 11:32:40 +0800 Subject: [PATCH 1/2] 1, fix opus non-48kHz issue. 2, fix the case that parameter and substream have different rate --- code/include/IAMF_decoder.h | 4 +- code/include/IAMF_defines.h | 12 ++ code/src/iamf_dec/IAMF_OBU.c | 12 +- code/src/iamf_dec/IAMF_decoder.c | 166 ++++++++++++--------- code/src/iamf_dec/opus/IAMF_opus_decoder.c | 2 +- code/src/iamf_dec/vlogging_tool_sr.c | 2 +- 6 files changed, 122 insertions(+), 76 deletions(-) mode change 100644 => 100755 code/src/iamf_dec/IAMF_decoder.c diff --git a/code/include/IAMF_decoder.h b/code/include/IAMF_decoder.h index ab453de8..06503509 100755 --- a/code/include/IAMF_decoder.h +++ b/code/include/IAMF_decoder.h @@ -119,7 +119,7 @@ int IAMF_layout_sound_system_channels_count(IAMF_SoundSystem ss); * @brief Get the number of channels of binaural pattern. * @return the number of channels. */ -int IAMF_layout_binaural_channels_count(); +int IAMF_layout_binaural_channels_count(void); /** * @brief Get the codec capability of iamf. Need to free string manually. @@ -129,7 +129,7 @@ int IAMF_layout_binaural_channels_count(); * is three digits to indicate the value of the additional_profile. * @return the supported codec string. */ -char *IAMF_decoder_get_codec_capability(); +char *IAMF_decoder_get_codec_capability(void); /** * @brief Set target normalization loudness value, then loudness will be diff --git a/code/include/IAMF_defines.h b/code/include/IAMF_defines.h index c4ad47c4..5a3bb2c8 100755 --- a/code/include/IAMF_defines.h +++ b/code/include/IAMF_defines.h @@ -75,6 +75,7 @@ typedef enum IAMF_AnimationType { ANIMATION_TYPE_LINEAR, ANIMATION_TYPE_BEZIER } IAMF_AnimationType; + /** * Layout Syntax: * @@ -130,6 +131,12 @@ typedef struct IAMF_Layout { * signed int (16) anchored_loudness; * } * } + * + * if (info_type & 0b11111100 > 0) { + * leb128() info_type_size; + * unsigned int (8 x info_type_size) info_type_bytes; + * } + * * } * * */ @@ -143,9 +150,14 @@ typedef struct IAMF_LoudnessInfo { uint8_t info_type; int16_t integrated_loudness; int16_t digital_peak; + int16_t true_peak; + uint8_t num_anchor_loudness; anchor_loudness_t *anchor_loudness; + + uint64_t info_type_size; + uint8_t *info_type_bytes; } IAMF_LoudnessInfo; /** diff --git a/code/src/iamf_dec/IAMF_OBU.c b/code/src/iamf_dec/IAMF_OBU.c index c4ea112d..0e7c3f20 100755 --- a/code/src/iamf_dec/IAMF_OBU.c +++ b/code/src/iamf_dec/IAMF_OBU.c @@ -967,8 +967,10 @@ IAMF_MixPresentation *iamf_mix_presentation_new(IAMF_OBU *obu) { } if (loudness[i].info_type & ~LOUDNESS_INFO_TYPE_ALL) { - size = bs_getAleb128(&b); - bs_skipABytes(&b, size); + loudness[i].info_type_size = bs_getAleb128(&b); + loudness[i].info_type_bytes = + IAMF_MALLOC(uint8_t, loudness[i].info_type_size); + bs_read(&b, loudness[i].info_type_bytes, loudness[i].info_type_size); ia_logd("extension loudness info size %" PRIu64, size); } } @@ -1049,10 +1051,12 @@ void iamf_mix_presentation_free(IAMF_MixPresentation *obj) { } if (sub->loudness) { - for (int i = 0; i < sub->num_layouts; ++i) + for (int i = 0; i < sub->num_layouts; ++i) { IAMF_FREE(sub->loudness[i].anchor_loudness); + IAMF_FREE(sub->loudness[i].info_type_bytes); + } + free(sub->loudness); } - IAMF_FREE(sub->loudness); } free(obj->sub_mixes); diff --git a/code/src/iamf_dec/IAMF_decoder.c b/code/src/iamf_dec/IAMF_decoder.c old mode 100644 new mode 100755 index 3e2e7ba9..b2f80bc0 --- a/code/src/iamf_dec/IAMF_decoder.c +++ b/code/src/iamf_dec/IAMF_decoder.c @@ -214,15 +214,46 @@ static int iamf_layout_channels_count(IAMF_Layout *layout) { return ret; } -static void iamf_layout_reset(IAMF_Layout *layout) { - if (layout) memset(layout, 0, sizeof(IAMF_Layout)); -} - static int iamf_layout_copy(IAMF_Layout *dst, IAMF_Layout *src) { memcpy(dst, src, sizeof(IAMF_Layout)); return IAMF_OK; } +static int iamf_loudness_info_copy(IAMF_LoudnessInfo *dst, + IAMF_LoudnessInfo *src) { + if (!dst || !src) return IAMF_ERR_BAD_ARG; + + dst->info_type = src->info_type; + dst->integrated_loudness = src->integrated_loudness; + dst->digital_peak = src->digital_peak; + dst->true_peak = src->true_peak; + dst->num_anchor_loudness = src->num_anchor_loudness; + if (src->num_anchor_loudness > 0) { + dst->anchor_loudness = + IAMF_MALLOCZ(anchor_loudness_t, src->num_anchor_loudness); + + if (!dst->anchor_loudness) goto loudness_info_copy_fail; + for (int i = 0; i < src->num_anchor_loudness; i++) { + dst->anchor_loudness[i].anchor_element = + src->anchor_loudness[i].anchor_element; + dst->anchor_loudness[i].anchored_loudness = + src->anchor_loudness[i].anchored_loudness; + } + } + if (src->info_type_bytes) { + dst->info_type_bytes = IAMF_MALLOC(uint8_t, src->info_type_size); + if (!dst->info_type_bytes) goto loudness_info_copy_fail; + memcpy(dst->info_type_bytes, src->info_type_bytes, src->info_type_size); + dst->info_type_size = src->info_type_size; + } + return IAMF_OK; + +loudness_info_copy_fail: + IAMF_FREE(dst->anchor_loudness); + IAMF_FREE(dst->info_type_bytes); + return IAMF_ERR_ALLOC_FAIL; +} + static int iamf_layout_copy2(IAMF_Layout *dst, TargetLayout *src) { if (!src) return IAMF_ERR_BAD_ARG; dst->type = src->type; @@ -244,9 +275,7 @@ static void iamf_layout_dump(IAMF_Layout *layout) { static void iamf_layout_info_free(LayoutInfo *layout) { if (layout) { - if (layout->sp.sp_layout.predefined_sp) - free(layout->sp.sp_layout.predefined_sp); - iamf_layout_reset(&layout->layout); + IAMF_FREE(layout->sp.sp_layout.predefined_sp); free(layout); } } @@ -673,8 +702,9 @@ static int iamf_codec_conf_get_sampling_rate(IAMF_CodecConf *c) { if (c->decoder_conf_size < 6) return IAMF_ERR_BAD_ARG; return reads32be(c->decoder_conf, 2); } else if (cid == IAMF_CODEC_OPUS) { - if (c->decoder_conf_size < 8) return IAMF_ERR_BAD_ARG; - return reads32be(c->decoder_conf, 4); + // https://aomediacodec.github.io/iamf/v1.1.0.html#opus-specific + // The sample rate used for computing offsets SHALL be 48 kHz. + return 48000; } else if (cid == IAMF_CODEC_AAC) { BitStream b; int ret, type; @@ -890,21 +920,23 @@ static MixGainUnit *iamf_database_parameter_get_mix_gain_unit( uint64_t start = 0; float ratio = 1.f; int use_default = 0; + uint64_t tsf_parameter_timestamp = 0; pi = iamf_database_parameter_viewer_get_item(&db->pViewer, pid); if (!pi) return 0; - ia_logd("pts %" PRIu64 ", duration %d, parameter id %" PRIu64 ", pts %" PRIu64 - ", duration %" PRId64, - pt, duration, pid, pi->timestamp, pi->duration); - if (pt < pi->timestamp) { + tsf_parameter_timestamp = time_transform(pi->timestamp, pi->rate, rate); + ia_logd("pts %" PRIu64 ", duration %d, rate %d, parameter id %" PRIu64 + ", pts %" PRIu64 ", duration %" PRId64 ", rate %d", + pt, duration, rate, pid, pi->timestamp, pi->duration, pi->rate); + if (pt < tsf_parameter_timestamp) { ia_logw("Don't receive mix gain parameter %" PRIu64 " at %" PRIu64 ", current parameter at " "%" PRIu64, - pi->id, pt, pi->timestamp); + pi->id, pt, tsf_parameter_timestamp); use_default = 1; } else - start = pt - pi->timestamp; + start = pt - tsf_parameter_timestamp; mgu = IAMF_MALLOCZ(MixGainUnit, 1); if (!mgu) return 0; @@ -1401,7 +1433,6 @@ static int iamf_database_add_object(IAMF_DataBase *db, IAMF_Object *obj) { if (version->primary_profile > db->profile) { ia_loge("Unimplemented profile %u", version->primary_profile); - free(obj); ret = IAMF_ERR_UNIMPLEMENTED; break; } @@ -1676,7 +1707,7 @@ static IAMF_StreamDecoder *iamf_presentation_take_decoder( IAMF_Presentation *pst, IAMF_Stream *stream) { IAMF_StreamDecoder *decoder = 0; for (int i = 0; i < pst->nb_streams; ++i) { - if (pst->decoders[i]->stream == stream) { + if (pst->decoders[i] && pst->decoders[i]->stream == stream) { decoder = pst->decoders[i]; pst->decoders[i] = 0; break; @@ -1690,7 +1721,7 @@ static IAMF_StreamRenderer *iamf_presentation_take_renderer( IAMF_Presentation *pst, IAMF_Stream *stream) { IAMF_StreamRenderer *renderer = 0; for (int i = 0; i < pst->nb_streams; ++i) { - if (pst->renderers[i]->stream == stream) { + if (pst->renderers[i] && pst->renderers[i]->stream == stream) { renderer = pst->renderers[i]; pst->renderers[i] = 0; break; @@ -2258,7 +2289,7 @@ static uint32_t iamf_set_stream_info(IAMF_DecoderHandle handle) { decoder = pst->decoders[i]; stream = pst->streams[i]; if (ctx->info.max_frame_size > stream->max_frame_size) { - for (int n = 0; i < DEC_BUF_CNT; ++n) { + for (int n = 0; n < DEC_BUF_CNT; ++n) { float *buffer = IAMF_REALLOC(float, decoder->buffers[n], ctx->info.max_frame_size * stream->nb_channels); @@ -2406,11 +2437,15 @@ static int iamf_stream_decoder_update_parameter(IAMF_StreamDecoder *dec, if (pi->type == IAMF_PARAMETER_TYPE_DEMIXING) { ChannelLayerContext *ctx = (ChannelLayerContext *)s->priv; ctx->dmx_mode = iamf_database_parameter_get_demix_mode( - db, pid, s->timestamp + dec->frame_size / 2); + db, pid, + time_transform(s->timestamp + dec->frame_size / 2, s->sampling_rate, + pi->rate)); ia_logd("update demix mode %d", ctx->dmx_mode); } else if (pi->type == IAMF_PARAMETER_TYPE_RECON_GAIN) { ReconGainList *recon = iamf_database_parameter_get_recon_gain_list( - db, pid, s->timestamp + dec->frame_size / 2); + db, pid, + time_transform(s->timestamp + dec->frame_size / 2, s->sampling_rate, + pi->rate)); ia_logd("update recon %p", recon); if (recon) iamf_stream_scale_decoder_update_recon_gain(dec, recon); } @@ -2977,6 +3012,10 @@ static int iamf_stream_render(IAMF_StreamRenderer *sr, float *in, float *out, return IAMF_OK; } +static void iamf_mixer_clear(IAMF_Mixer *m) { + memset(m->frames, 0, m->nb_elements); +} + void iamf_mixer_reset(IAMF_Mixer *m) { IAMF_FREE(m->element_ids); IAMF_FREE(m->frames); @@ -3300,15 +3339,6 @@ int iamf_decoder_internal_deliver(IAMF_DecoderHandle handle, IAMF_Frame *obj) { if (obj->trim_start > 0) ia_logd("trimming start %" PRIu64 " ", obj->trim_start); if (obj->trim_end > 0) ia_logd("trimming end %" PRIu64, obj->trim_end); - -#if 0 - if (decoder->packet.sub_packets[idx]) { - /* when the frame of stream has been overwrite, the timestamp of stream - * should be elapsed and the global time should be updated together. */ - - stream->timestamp += decoder->frame_size; - } -#endif } iamf_stream_decoder_receive_packet(decoder, idx, obj); } @@ -3446,6 +3476,7 @@ static int iamf_decoder_timestamp_sync(IAMF_DecoderHandle handle, Viewer *pv = &db->pViewer; ParameterItem *pi; uint64_t ref_timestamp = timestamp; + uint64_t tsf_timestamp = 0; if (!timestamp) return IAMF_OK; @@ -3454,18 +3485,14 @@ static int iamf_decoder_timestamp_sync(IAMF_DecoderHandle handle, for (int i = 0; i < pv->count; ++i) { pi = (ParameterItem *)pv->items[i]; if (!pi) continue; + tsf_timestamp = time_transform(timestamp, rate, pi->rate); if (!pi->timestamp) { - if (pi->rate != rate) { - ia_logw("Difference rate between parameter (%" PRIu64 - ") and reference ( %d vs %d )", - pi->id, pi->rate, rate); - continue; - } - pi->timestamp = timestamp; + pi->timestamp = tsf_timestamp; ia_logi("Parameter (%" PRIu64 ") timestamp sync to %" PRIu64, pi->id, timestamp); if (ref_timestamp != timestamp) { - iamf_parameter_item_fill_segment(pi, ref_timestamp - timestamp); + iamf_parameter_item_fill_segment( + pi, time_transform(ref_timestamp - timestamp, rate, pi->rate)); } } else ia_logi("Parameter (%" PRIu64 ") timestamp %" PRIu64, pi->id, @@ -3544,7 +3571,8 @@ static int iamf_decoder_enable_mix_presentation(IAMF_DecoderHandle handle, iamf_parameter_item_enable(pi, 1); else iamf_database_parameter_add_item(db, elem->parameters[k], - elem->element_id, rate); + elem->element_id, + elem->parameters[k]->rate); } pid = sub->conf_s[i].element_mix_gain.base.id; @@ -3553,8 +3581,8 @@ static int iamf_decoder_enable_mix_presentation(IAMF_DecoderHandle handle, if (pi) iamf_parameter_item_enable(pi, 1); else if (iamf_database_parameter_add_item( - db, &sub->conf_s[i].element_mix_gain.base, INVALID_ID, rate) == - IAMF_OK) { + db, &sub->conf_s[i].element_mix_gain.base, INVALID_ID, + sub->conf_s[i].element_mix_gain.base.rate) == IAMF_OK) { float gain_db; pi = iamf_database_parameter_get_item(db, pid); gain_db = q_to_float(sub->conf_s[i].element_mix_gain.mix_gain, 8); @@ -3581,9 +3609,9 @@ static int iamf_decoder_enable_mix_presentation(IAMF_DecoderHandle handle, pi = iamf_database_parameter_get_item(db, pid); if (pi) iamf_parameter_item_enable(pi, 1); - else if (iamf_database_parameter_add_item(db, &sub->output_mix_gain.base, - INVALID_ID, - ctx->sampling_rate) == IAMF_OK) { + else if (iamf_database_parameter_add_item( + db, &sub->output_mix_gain.base, INVALID_ID, + sub->output_mix_gain.base.rate) == IAMF_OK) { float gain_db; pi = iamf_database_parameter_get_item(db, pid); gain_db = q_to_float(sub->output_mix_gain.mix_gain, 8); @@ -4060,9 +4088,9 @@ static int iamf_extra_data_init(IAMF_DecoderHandle handle) { for (int i = 0; i < metadata->num_loudness_layouts; ++i) { iamf_layout_copy2(&metadata->loudness_layout[i], ctx->presentation->obj->sub_mixes->layouts[i]); + iamf_loudness_info_copy(&metadata->loudness[i], + &pst->obj->sub_mixes->loudness[i]); } - memcpy(metadata->loudness, pst->obj->sub_mixes->loudness, - sizeof(IAMF_LoudnessInfo) * metadata->num_loudness_layouts); if (pst) { ElementItem *ei; @@ -4086,9 +4114,7 @@ static int iamf_extra_data_init(IAMF_DecoderHandle handle) { } static int iamf_extra_data_copy(IAMF_extradata *dst, IAMF_extradata *src) { - if (!src) return IAMF_ERR_BAD_ARG; - - if (!dst) return IAMF_ERR_INTERNAL; + if (!src || !dst) return IAMF_ERR_BAD_ARG; dst->output_sound_system = src->output_sound_system; dst->number_of_samples = src->number_of_samples; @@ -4096,45 +4122,49 @@ static int iamf_extra_data_copy(IAMF_extradata *dst, IAMF_extradata *src) { dst->sampling_rate = src->sampling_rate; dst->num_loudness_layouts = src->num_loudness_layouts; dst->output_sound_mode = src->output_sound_mode; + dst->loudness_layout = 0; + dst->loudness = 0; + dst->param = 0; if (dst->num_loudness_layouts) { dst->loudness_layout = IAMF_MALLOCZ(IAMF_Layout, dst->num_loudness_layouts); dst->loudness = IAMF_MALLOCZ(IAMF_LoudnessInfo, dst->num_loudness_layouts); - if (!dst->loudness_layout || !dst->loudness) return IAMF_ERR_ALLOC_FAIL; + if (!dst->loudness_layout || !dst->loudness) goto alloc_fail; for (int i = 0; i < dst->num_loudness_layouts; ++i) { iamf_layout_copy(&dst->loudness_layout[i], &src->loudness_layout[i]); - memcpy(&dst->loudness[i], &src->loudness[i], sizeof(IAMF_LoudnessInfo)); + iamf_loudness_info_copy(&dst->loudness[i], &src->loudness[i]); } - } else { - dst->loudness_layout = 0; - dst->loudness = 0; } dst->num_parameters = src->num_parameters; if (dst->num_parameters) { dst->param = IAMF_MALLOCZ(IAMF_Param, dst->num_parameters); - if (!dst->param) return IAMF_ERR_ALLOC_FAIL; + if (!dst->param) goto alloc_fail; for (int i = 0; i < src->num_parameters; ++i) memcpy(&dst->param[i], &src->param[i], sizeof(IAMF_Param)); - } else { - dst->param = 0; } return IAMF_OK; + +alloc_fail: + IAMF_FREE(dst->loudness_layout); + IAMF_FREE(dst->loudness); + IAMF_FREE(dst->param); + return IAMF_ERR_ALLOC_FAIL; } void iamf_extra_data_reset(IAMF_extradata *data) { if (data) { - if (data->loudness_layout) { - for (int i = 0; i < data->num_loudness_layouts; ++i) - iamf_layout_reset(&data->loudness_layout[i]); - - free(data->loudness_layout); + IAMF_FREE(data->loudness_layout); + for (int i = 0; i < data->num_loudness_layouts; ++i) { + if (data->loudness) { + IAMF_FREE(data->loudness[i].anchor_loudness); + IAMF_FREE(data->loudness[i].info_type_bytes); + } } - - if (data->loudness) free(data->loudness); + IAMF_FREE(data->loudness); if (data->param) free(data->param); memset(data, 0, sizeof(IAMF_extradata)); @@ -4418,24 +4448,24 @@ char *IAMF_decoder_get_codec_capability() { if (!ccs_str) return 0; - snprintf(cc_str, STRING_SIZE, "iamf.%.03u.%.03u.ipcm", IAMF_PROFILE_DEFAULT, + snprintf(cc_str, STRING_SIZE, "iamf.%.03d.%.03d.ipcm", IAMF_PROFILE_DEFAULT, IAMF_PROFILE_DEFAULT); strcat(ccs_str, cc_str); #ifdef CONFIG_OPUS_CODEC - snprintf(cc_str, STRING_SIZE, ";iamf.%.03u.%.03u.Opus", IAMF_PROFILE_DEFAULT, + snprintf(cc_str, STRING_SIZE, ";iamf.%.03d.%.03d.Opus", IAMF_PROFILE_DEFAULT, IAMF_PROFILE_DEFAULT); strcat(ccs_str, cc_str); #endif #ifdef CONFIG_AAC_CODEC - snprintf(cc_str, STRING_SIZE, ";iamf.%.03u.%.03u.mp4a.40.2", + snprintf(cc_str, STRING_SIZE, ";iamf.%.03d.%.03d.mp4a.40.2", IAMF_PROFILE_DEFAULT, IAMF_PROFILE_DEFAULT); strcat(ccs_str, cc_str); #endif #ifdef CONFIG_FLAC_CODEC - snprintf(cc_str, STRING_SIZE, ";iamf.%.03u.%.03u.fLaC", IAMF_PROFILE_DEFAULT, + snprintf(cc_str, STRING_SIZE, ";iamf.%.03d.%.03d.fLaC", IAMF_PROFILE_DEFAULT, IAMF_PROFILE_DEFAULT); strcat(ccs_str, cc_str); #endif diff --git a/code/src/iamf_dec/opus/IAMF_opus_decoder.c b/code/src/iamf_dec/opus/IAMF_opus_decoder.c index 5b6e9a9b..b7e1221c 100755 --- a/code/src/iamf_dec/opus/IAMF_opus_decoder.c +++ b/code/src/iamf_dec/opus/IAMF_opus_decoder.c @@ -80,7 +80,7 @@ static int iamf_opus_init(IAMF_CodecContext *ths) { return IAMF_ERR_BAD_ARG; } - ths->sample_rate = readu32be(config, 4); + ths->sample_rate = 48000; // readu32be(config, 4); ths->channel_mapping_family = config[10]; ctx->dec = opus_multistream2_decoder_create(ths->sample_rate, ths->streams, diff --git a/code/src/iamf_dec/vlogging_tool_sr.c b/code/src/iamf_dec/vlogging_tool_sr.c index d8cb27dc..8761a395 100755 --- a/code/src/iamf_dec/vlogging_tool_sr.c +++ b/code/src/iamf_dec/vlogging_tool_sr.c @@ -442,7 +442,7 @@ static void write_codec_config_log(uint64_t idx, void* obu, char* log) { log += write_yaml_form(log, 6, "total_samples_in_stream: %llu", total_samples_in_stream); - char hex_string[20] = { + char hex_string[34] = { 0, }; int pos = 0; From 3532e6940b21cd9387cccf3715bf2c4a023d113b Mon Sep 17 00:00:00 2001 From: "yilun.zhang" Date: Tue, 11 Mar 2025 11:39:07 +0800 Subject: [PATCH 2/2] expand the max traks --- code/test/tools/iamfdec/include/mp4demux.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/test/tools/iamfdec/include/mp4demux.h b/code/test/tools/iamfdec/include/mp4demux.h index 1b6f44e4..c4761e8b 100755 --- a/code/test/tools/iamfdec/include/mp4demux.h +++ b/code/test/tools/iamfdec/include/mp4demux.h @@ -125,7 +125,7 @@ typedef struct { int num_a_trak; int sel_a_trak; int cur_r_trak; - int trak_type[8]; + int trak_type[32]; // video_wtr_t v_trak; audio_rtr_t *a_trak; struct {