|
| 1 | +// audio_loader.cpp: MP3 decode for reference audio (minimp3, no deps, no temp files) |
| 2 | + |
| 3 | +#define MINIMP3_IMPLEMENTATION |
| 4 | +#include "third_party/minimp3.h" |
| 5 | + |
| 6 | +#include "wav.h" |
| 7 | +#include "audio.h" |
| 8 | +#include <cstdio> |
| 9 | +#include <cstring> |
| 10 | +#include <vector> |
| 11 | +#include <algorithm> |
| 12 | + |
| 13 | +static bool path_ends_with_ci(const char * path, const char * suffix) { |
| 14 | + size_t pl = strlen(path), sl = strlen(suffix); |
| 15 | + if (pl < sl) return false; |
| 16 | + const char * p = path + pl - sl; |
| 17 | + for (size_t i = 0; i < sl; i++) { |
| 18 | + char a = (char)(p[i] >= 'A' && p[i] <= 'Z' ? p[i] + 32 : p[i]); |
| 19 | + char b = (char)(suffix[i] >= 'A' && suffix[i] <= 'Z' ? suffix[i] + 32 : suffix[i]); |
| 20 | + if (a != b) return false; |
| 21 | + } |
| 22 | + return true; |
| 23 | +} |
| 24 | + |
| 25 | +static void pcm_to_float_stereo_48k( |
| 26 | + const int16_t * pcm, size_t num_samples, int channels, unsigned int sample_rate, |
| 27 | + std::vector<float> * out) |
| 28 | +{ |
| 29 | + const float scale = 1.0f / 32768.0f; |
| 30 | + out->resize(num_samples * 2); |
| 31 | + if (channels == 1) { |
| 32 | + for (size_t i = 0; i < num_samples; i++) { |
| 33 | + float s = (float)pcm[i] * scale; |
| 34 | + (*out)[i * 2] = s; |
| 35 | + (*out)[i * 2 + 1] = s; |
| 36 | + } |
| 37 | + } else { |
| 38 | + for (size_t i = 0; i < num_samples * 2; i++) |
| 39 | + (*out)[i] = (float)pcm[i] * scale; |
| 40 | + } |
| 41 | + |
| 42 | + if (sample_rate != 48000) { |
| 43 | + size_t in_len = num_samples; |
| 44 | + size_t out_len = (size_t)((double)in_len * 48000.0 / (double)sample_rate); |
| 45 | + std::vector<float> resampled(out_len * 2); |
| 46 | + for (size_t i = 0; i < out_len; i++) { |
| 47 | + double t = (double)i * (double)in_len / (double)out_len; |
| 48 | + size_t i0 = (size_t)t; |
| 49 | + size_t i1 = std::min(i0 + 1, in_len - 1); |
| 50 | + float w = (float)(t - (double)i0); |
| 51 | + for (int c = 0; c < 2; c++) |
| 52 | + resampled[i * 2 + c] = (*out)[i0 * 2 + c] * (1.0f - w) + (*out)[i1 * 2 + c] * w; |
| 53 | + } |
| 54 | + *out = std::move(resampled); |
| 55 | + } |
| 56 | +} |
| 57 | + |
| 58 | +int mp3_load_48k_stereo(const char * path, std::vector<float> * out) { |
| 59 | + FILE * f = fopen(path, "rb"); |
| 60 | + if (!f) return -1; |
| 61 | + fseek(f, 0, SEEK_END); |
| 62 | + long sz = ftell(f); |
| 63 | + fseek(f, 0, SEEK_SET); |
| 64 | + if (sz <= 0 || sz > 200 * 1024 * 1024) { |
| 65 | + fclose(f); |
| 66 | + return -1; |
| 67 | + } |
| 68 | + std::vector<uint8_t> buf((size_t)sz); |
| 69 | + if (fread(buf.data(), 1, (size_t)sz, f) != (size_t)sz) { |
| 70 | + fclose(f); |
| 71 | + return -1; |
| 72 | + } |
| 73 | + fclose(f); |
| 74 | + |
| 75 | + mp3dec_t dec; |
| 76 | + mp3dec_init(&dec); |
| 77 | + mp3dec_frame_info_t info; |
| 78 | + std::vector<int16_t> pcm; |
| 79 | + const uint8_t * read_pos = buf.data(); |
| 80 | + int remaining = (int)buf.size(); |
| 81 | + int first_hz = 0, first_ch = 0; |
| 82 | + const size_t max_samples = (size_t)(60 * 48000 * 2); |
| 83 | + |
| 84 | + while (remaining > 0) { |
| 85 | + size_t old_size = pcm.size(); |
| 86 | + if (old_size + (size_t)MINIMP3_MAX_SAMPLES_PER_FRAME > max_samples) break; |
| 87 | + pcm.resize(old_size + (size_t)MINIMP3_MAX_SAMPLES_PER_FRAME); |
| 88 | + int frame_samples = mp3dec_decode_frame(&dec, read_pos, remaining, pcm.data() + old_size, &info); |
| 89 | + if (frame_samples <= 0) { |
| 90 | + pcm.resize(old_size); |
| 91 | + read_pos++; |
| 92 | + remaining--; |
| 93 | + continue; |
| 94 | + } |
| 95 | + if (first_hz == 0) { |
| 96 | + first_hz = info.hz; |
| 97 | + first_ch = info.channels; |
| 98 | + } |
| 99 | + pcm.resize(old_size + (size_t)(frame_samples * info.channels)); |
| 100 | + read_pos += info.frame_bytes; |
| 101 | + remaining -= info.frame_bytes; |
| 102 | + } |
| 103 | + |
| 104 | + if (pcm.empty() || first_hz == 0) return -1; |
| 105 | + size_t num_samples = pcm.size() / (size_t)first_ch; |
| 106 | + pcm_to_float_stereo_48k(pcm.data(), num_samples, first_ch, (unsigned)first_hz, out); |
| 107 | + return (int)(out->size() / 2); |
| 108 | +} |
| 109 | + |
| 110 | +int load_audio_48k_stereo(const char * path, std::vector<float> * out) { |
| 111 | + if (!path || !out) return -1; |
| 112 | + if (path_ends_with_ci(path, ".mp3")) |
| 113 | + return mp3_load_48k_stereo(path, out); |
| 114 | + if (path_ends_with_ci(path, ".wav")) |
| 115 | + return wav_load_48k_stereo(path, out); |
| 116 | + return -1; |
| 117 | +} |
0 commit comments