Skip to content

Commit 7ab7496

Browse files
Audio: aec: optimize acoustic echo cancellation processing
This check-in introduces performance optimization modifications to the audio Echo Cancellation (AEC) implementation. The enhancements primarily focus on refining loop structures and memory copy operations to ensure more efficient use of cycles. Signed-off-by: shastry <malladi.sastry@intel.com>
1 parent 3681e09 commit 7ab7496

2 files changed

Lines changed: 194 additions & 83 deletions

File tree

src/audio/google/google_rtc_audio_processing.c

Lines changed: 127 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -822,23 +822,46 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
822822
/* 32float: de-interlace ref buffer, convert it to float, skip channels if > Max
823823
* 16int: linearize buffer, skip channels if > Max
824824
*/
825+
// Optimization:Reduce cycle waste by streamlining the inner loop,
826+
// converting from array indexing to pointer arithmetic,
827+
// and putting data copy verification outside the loop.
825828
buffer_offset = 0;
826-
for (int i = 0; i < cd->num_frames; i++) {
827-
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
829+
int16_t *ref_end = ref + cd->num_frames * cd->num_aec_reference_channels;
830+
831+
if ((void *)ref_end >= (void *)ref_buf_end)
832+
ref_end = (void *)ref_buf_start;
833+
828834
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
829-
cd->aec_reference_buffer_ptrs[channel][i] =
830-
convert_int16_to_float(ref[channel]);
835+
float **ref_ptr = cd->aec_reference_buffer_ptrs;
836+
837+
for (int i = 0; i < cd->num_frames; ++i) {
838+
for (int channel = 0; channel < cd->num_aec_reference_channels; ++channel) {
839+
if (ref && (void *)ref >= (void *)ref_buf_start &&
840+
(void *)ref < (void *)ref_buf_end) {
841+
(*ref_ptr)[channel] = convert_int16_to_float(*ref++);
842+
} else {
843+
//ref does not point to valid int16_t data
844+
return -1;
845+
}
846+
}
847+
ref_ptr++;
848+
}
849+
831850
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
832-
cd->aec_reference_buffer[buffer_offset++] = ref[channel];
833-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
851+
int16_t *ref_buf = cd->aec_reference_buffer;
834852

853+
while (ref != ref_end) {
854+
if (ref && (void *)ref >= (void *)ref_buf_start &&
855+
(void *)ref < (void *)ref_buf_end) {
856+
*ref_buf++ = *ref++;
857+
} else {
858+
// ref does not point to valid int16_t data
859+
return -2;
835860
}
836-
837-
ref += cd->num_aec_reference_channels;
838-
if ((void *)ref >= (void *)ref_buf_end)
839-
ref = (void *)ref_buf_start;
840861
}
841862

863+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
864+
842865
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
843866
GoogleRtcAudioProcessingAnalyzeRender_float32(cd->state,
844867
(const float **)
@@ -855,24 +878,47 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
855878
(const void **)&src_buf_start, &src_buf_size);
856879
assert(!ret);
857880
src_buf_end = src_buf_start + src_buf_size;
858-
881+
// The second optimization eliminates the inner loop
882+
// and replaces it with pointer arithmetic for speedier access.
883+
// To reduce cycle waste, the data copy check is moved outside of the loop.
859884
buffer_offset = 0;
860-
for (int i = 0; i < cd->num_frames; i++) {
861-
for (channel = 0; channel < cd->num_capture_channels; channel++)
885+
int16_t *src_end = src + cd->num_frames * cd->config.output_fmt.channels_count;
886+
887+
if ((void *)src_end >= (void *)src_buf_end)
888+
src_end = (void *)src_buf_start;
889+
862890
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
863-
cd->process_buffer_ptrs[channel][i] = convert_int16_to_float(src[channel]);
864-
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
865-
cd->process_buffer[buffer_offset++] = src[channel];
866-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
891+
float **proc_ptr = cd->process_buffer_ptrs;
892+
893+
// Process the data until the source pointer reaches the end
894+
// This assumes that the source buffer is continuous in memory
895+
// If the source buffer is not continuous (i.e., if it wraps around
896+
// like in a circular buffer), this code will not work correctly
897+
while (src != src_end) {
898+
// Check if src has exceeded the buffer end
899+
if ((void *)src >= (void *)src_buf_end)
900+
src = (void *)src_buf_start;
867901

868-
/* move pointer to next frame
869-
* number of incoming channels may be < cd->num_capture_channels
870-
*/
871-
src += cd->config.output_fmt.channels_count;
902+
*proc_ptr++ = convert_int16_to_float(src++);
903+
}
904+
905+
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
906+
int16_t *proc_buf = cd->process_buffer;
907+
908+
// Process the data until the source pointer reaches the end
909+
// This assumes that the source buffer is continuous in memory
910+
// If the source buffer is not continuous (i.e., if it wraps
911+
// around like in a circular buffer), this code will not work correctly
912+
while (src != src_end) {
913+
// Check if src has exceeded the buffer end
872914
if ((void *)src >= (void *)src_buf_end)
873915
src = (void *)src_buf_start;
916+
917+
*proc_buf++ = *src++;
874918
}
875919

920+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
921+
876922
source_release_data(src_stream, num_of_bytes_to_process);
877923

878924
/* call the library, use same in/out buffers */
@@ -894,24 +940,24 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
894940

895941
/* process all channels in output stream */
896942
buffer_offset = 0;
897-
for (int i = 0; i < cd->num_frames; i++) {
898-
for (channel = 0; channel < cd->config.output_fmt.channels_count; channel++) {
899-
/* set data in processed channels, zeroize not processed */
900-
if (channel < cd->num_capture_channels)
943+
int16_t *dst_end = dst + cd->num_frames * cd->config.output_fmt.channels_count;
944+
945+
if ((void *)dst_end >= (void *)dst_buf_end)
946+
dst_end = (void *)dst_buf_start;
947+
901948
#if CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API
902-
dst[channel] = convert_float_to_int16(
903-
cd->process_buffer_ptrs[channel][i]);
949+
float **proc_ptr = cd->process_buffer_ptrs;
950+
951+
while (dst != dst_end && *proc_ptr)
952+
*dst++ = convert_float_to_int16(*proc_ptr++);
953+
904954
#else /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
905-
dst[channel] = cd->process_buffer[buffer_offset++];
906-
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
907-
else
908-
dst[channel] = 0;
909-
}
955+
int16_t *process_buffer = cd->process_buffer;
910956

911-
dst += cd->config.output_fmt.channels_count;
912-
if ((void *)dst >= (void *)dst_buf_end)
913-
dst = (void *)dst_buf_start;
914-
}
957+
while (dst != dst_end && *process_buffer)
958+
*dst++ = *process_buffer++;
959+
960+
#endif /* CONFIG_COMP_GOOGLE_RTC_USE_32_BIT_FLOAT_API */
915961

916962
sink_commit_buffer(dst_stream, num_of_bytes_to_process);
917963

@@ -928,6 +974,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
928974
int16_t *src, *dst, *ref;
929975
uint32_t num_aec_reference_frames;
930976
uint32_t num_aec_reference_bytes;
977+
int ref_channels;
978+
int aec_ref_product;
931979
int num_samples_remaining;
932980
int num_frames_remaining;
933981
int channel;
@@ -950,25 +998,32 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
950998
ref_stream = ref_streamb->data;
951999
ref = audio_stream_get_rptr(ref_stream);
9521000

1001+
// Pre-calculate the number of channels in the reference stream for efficiency
1002+
ref_channels = audio_stream_get_channels(ref_stream);
1003+
1004+
// Pre-calculate the product of the number of AEC reference channels and the AEC
1005+
// reference frame index
1006+
aec_ref_product = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
1007+
9531008
num_aec_reference_frames = input_buffers[cd->aec_reference_source].size;
9541009
num_aec_reference_bytes = audio_stream_frame_bytes(ref_stream) * num_aec_reference_frames;
9551010

956-
num_samples_remaining = num_aec_reference_frames * audio_stream_get_channels(ref_stream);
1011+
num_samples_remaining = num_aec_reference_frames * ref_channels;
9571012
while (num_samples_remaining) {
9581013
nmax = audio_stream_samples_without_wrap_s16(ref_stream, ref);
9591014
n = MIN(num_samples_remaining, nmax);
9601015
for (i = 0; i < n; i += cd->num_aec_reference_channels) {
961-
j = cd->num_aec_reference_channels * cd->aec_reference_frame_index;
1016+
j = aec_ref_product;
9621017
for (channel = 0; channel < cd->num_aec_reference_channels; ++channel)
9631018
cd->aec_reference_buffer[j++] = ref[channel];
964-
965-
ref += audio_stream_get_channels(ref_stream);
1019+
ref += ref_channels;
9661020
++cd->aec_reference_frame_index;
967-
9681021
if (cd->aec_reference_frame_index == cd->num_frames) {
9691022
GoogleRtcAudioProcessingAnalyzeRender_int16(cd->state,
970-
cd->aec_reference_buffer);
1023+
cd->aec_reference_buffer);
9711024
cd->aec_reference_frame_index = 0;
1025+
// Reset the product as the frame index is reset
1026+
aec_ref_product = 0;
9721027
}
9731028
}
9741029
num_samples_remaining -= n;
@@ -984,6 +1039,8 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9841039
src = audio_stream_get_rptr(mic_stream);
9851040
dst = audio_stream_get_wptr(out_stream);
9861041

1042+
//Move out of loop
1043+
int mic_stream_channels = audio_stream_get_channels(mic_stream);
9871044
frames = input_buffers[cd->raw_microphone_source].size;
9881045
num_frames_remaining = frames;
9891046

@@ -993,34 +1050,40 @@ static int google_rtc_audio_processing_process(struct processing_module *mod,
9931050
nmax = audio_stream_frames_without_wrap(out_stream, dst);
9941051
n = MIN(n, nmax);
9951052
for (i = 0; i < n; i++) {
996-
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
997-
cd->num_capture_channels]),
998-
cd->num_frames * cd->num_capture_channels *
999-
sizeof(cd->raw_mic_buffer[0]), src,
1000-
sizeof(int16_t) * cd->num_capture_channels);
1001-
++cd->raw_mic_buffer_frame_index;
1002-
1003-
memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
1004-
sizeof(cd->output_buffer[0]),
1005-
&(cd->output_buffer[cd->output_buffer_frame_index *
1006-
cd->num_capture_channels]),
1007-
sizeof(int16_t) * cd->num_capture_channels);
1008-
++cd->output_buffer_frame_index;
1009-
1010-
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
1011-
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
1012-
cd->raw_mic_buffer,
1013-
cd->output_buffer);
1014-
cd->output_buffer_frame_index = 0;
1015-
cd->raw_mic_buffer_frame_index = 0;
1053+
// If we haven't filled the buffer yet, copy the data
1054+
if (cd->raw_mic_buffer_frame_index < cd->num_frames) {
1055+
memcpy_s(&(cd->raw_mic_buffer[cd->raw_mic_buffer_frame_index *
1056+
cd->num_capture_channels]),
1057+
cd->num_frames * cd->num_capture_channels *
1058+
sizeof(cd->raw_mic_buffer[0]), src,
1059+
sizeof(int16_t) * cd->num_capture_channels);
1060+
++cd->raw_mic_buffer_frame_index;
1061+
}
1062+
1063+
if (cd->output_buffer_frame_index < cd->num_frames) {
1064+
memcpy_s(dst, cd->num_frames * cd->num_capture_channels *
1065+
sizeof(cd->output_buffer[0]),
1066+
&(cd->output_buffer[cd->output_buffer_frame_index *
1067+
cd->num_capture_channels]),
1068+
sizeof(int16_t) * cd->num_capture_channels);
1069+
++cd->output_buffer_frame_index;
10161070
}
10171071

1018-
src += audio_stream_get_channels(mic_stream);
1019-
dst += audio_stream_get_channels(out_stream);
1072+
src += mic_stream_channels;
1073+
dst += mic_stream_channels;
10201074
}
10211075
num_frames_remaining -= n;
10221076
src = audio_stream_wrap(mic_stream, src);
10231077
dst = audio_stream_wrap(out_stream, dst);
1078+
1079+
// If we've filled the buffer, process the data
1080+
if (cd->raw_mic_buffer_frame_index == cd->num_frames) {
1081+
GoogleRtcAudioProcessingProcessCapture_int16(cd->state,
1082+
cd->raw_mic_buffer,
1083+
cd->output_buffer);
1084+
cd->output_buffer_frame_index = 0;
1085+
cd->raw_mic_buffer_frame_index = 0;
1086+
}
10241087
}
10251088

10261089
module_update_buffer_position(&input_buffers[cd->raw_microphone_source],

0 commit comments

Comments
 (0)