Fix decoding for streams containing differently sampled frames - spek

commit 79ae75a7b0477d111c24de78f38940fb3c6ca4da
parent c4c134684e13e4e0de797778d4eca9213429bddf
Author: Alexander Kojevnikov <alexander@kojevnikov.com>
Date:   Thu, 28 Feb 2013 21:49:59 -0800

Fix decoding for streams containing differently sampled frames

Diffstat:
M src/spek-audio.cc  | 98 ++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
M src/spek-audio.h  | 4 +---
M src/spek-pipeline.cc  | 57 +++++++++++++--------------------------------------------

3 files changed, 71 insertions(+), 88 deletions(-)
diff --git a/src/spek-audio.cc b/src/spek-audio.cc
@@ -31,7 +31,7 @@ public:
     AudioFileImpl(
         AudioError error, AVFormatContext *format_context, int audio_stream,
         const std::string& codec_name, int bit_rate, int sample_rate, int bits_per_sample,
-        int channels, double duration, bool is_planar, int width, bool fp
+        int channels, double duration
     );
     ~AudioFileImpl() override;
     void start(int samples) override;
@@ -44,9 +44,7 @@ public:
     int get_bits_per_sample() const override { return this->bits_per_sample; }
     int get_channels() const override { return this->channels; }
     double get_duration() const override { return this->duration; }
-    int get_width() const override { return this->width; }
-    bool get_fp() const override { return this->fp; }
-    const uint8_t *get_buffer() const override { return this->buffer; }
+    const float *get_buffer() const override { return this->buffer; }
     int64_t get_frames_per_interval() const override { return this->frames_per_interval; }
     int64_t get_error_per_interval() const override { return this->error_per_interval; }
     int64_t get_error_base() const override { return this->error_base; }
@@ -61,15 +59,12 @@ private:
     int bits_per_sample;
     int channels;
     double duration;
-    bool is_planar;
-    int width;
-    bool fp;
 
     AVPacket packet;
     int offset;
     AVFrame *frame;
-    int buffer_size;
-    uint8_t *buffer;
+    int buffer_len;
+    float *buffer;
     // TODO: these guys don't belong here, move them somewhere else when revamping the pipeline
     int64_t frames_per_interval;
     int64_t error_per_interval;
@@ -163,20 +158,12 @@ std::unique_ptr<AudioFile> Audio::open(const std::string& file_name)
         error = AudioError::CANNOT_OPEN_DECODER;
     }
 
-    bool is_planar = false;
-    int width = 0;
-    bool fp = false;
     if (!error) {
-        is_planar = av_sample_fmt_is_planar(codec_context->sample_fmt);
-        width = av_get_bytes_per_sample(codec_context->sample_fmt);
         AVSampleFormat fmt = codec_context->sample_fmt;
-        if (fmt == AV_SAMPLE_FMT_S16 || fmt == AV_SAMPLE_FMT_S16P ||
-            fmt == AV_SAMPLE_FMT_S32 || fmt == AV_SAMPLE_FMT_S32P) {
-            fp = false;
-        } else if (fmt == AV_SAMPLE_FMT_FLT || fmt == AV_SAMPLE_FMT_FLTP ||
-            fmt == AV_SAMPLE_FMT_DBL || fmt == AV_SAMPLE_FMT_DBLP ) {
-            fp = true;
-        } else {
+        if (fmt != AV_SAMPLE_FMT_S16 && fmt != AV_SAMPLE_FMT_S16P &&
+            fmt != AV_SAMPLE_FMT_S32 && fmt != AV_SAMPLE_FMT_S32P &&
+            fmt != AV_SAMPLE_FMT_FLT && fmt != AV_SAMPLE_FMT_FLTP &&
+            fmt != AV_SAMPLE_FMT_DBL && fmt != AV_SAMPLE_FMT_DBLP ) {
             error = AudioError::BAD_SAMPLE_FORMAT;
         }
     }
@@ -184,26 +171,26 @@ std::unique_ptr<AudioFile> Audio::open(const std::string& file_name)
     return std::unique_ptr<AudioFile>(new AudioFileImpl(
         error, format_context, audio_stream,
         codec_name, bit_rate, sample_rate, bits_per_sample,
-        channels, duration, is_planar, width, fp
+        channels, duration
     ));
 }
 
 AudioFileImpl::AudioFileImpl(
     AudioError error, AVFormatContext *format_context, int audio_stream,
     const std::string& codec_name, int bit_rate, int sample_rate, int bits_per_sample,
-    int channels, double duration, bool is_planar, int width, bool fp
+    int channels, double duration
 ) :
     error(error), format_context(format_context), audio_stream(audio_stream),
     codec_name(codec_name), bit_rate(bit_rate),
     sample_rate(sample_rate), bits_per_sample(bits_per_sample),
-    channels(channels), duration(duration), is_planar(is_planar), width(width), fp(fp)
+    channels(channels), duration(duration)
 {
     av_init_packet(&this->packet);
     this->packet.data = nullptr;
     this->packet.size = 0;
     this->offset = 0;
     this->frame = avcodec_alloc_frame();
-    this->buffer_size = 0;
+    this->buffer_len = 0;
     this->buffer = nullptr;
     this->frames_per_interval = 0;
     this->error_per_interval = 0;
@@ -276,27 +263,56 @@ int AudioFileImpl::read()
             // We have data, return it and come back for more later.
             int samples = this->frame->nb_samples;
             int channels = this->channels;
-            int width = this->width;
-            int buffer_size = samples * channels * width;
-            if (buffer_size > this->buffer_size) {
-                this->buffer = (uint8_t*)av_realloc(this->buffer, buffer_size);
-                this->buffer_size = buffer_size;
+            int buffer_len = samples * channels;
+            if (buffer_len > this->buffer_len) {
+                this->buffer = static_cast<float*>(
+                    av_realloc(this->buffer, buffer_len * sizeof(float))
+                );
+                this->buffer_len = buffer_len;
             }
-            if (this->is_planar) {
+
+            AVSampleFormat format = static_cast<AVSampleFormat>(this->frame->format);
+            int is_planar = av_sample_fmt_is_planar(format);
+            int i = 0;
+            for (int sample = 0; sample < samples; ++sample) {
                 for (int channel = 0; channel < channels; ++channel) {
-                    uint8_t *buffer = this->buffer + channel * width;
-                    uint8_t *data = this->frame->data[channel];
-                    for (int sample = 0; sample < samples; ++sample) {
-                        for (int i = 0; i < width; ++i) {
-                            *buffer++ = *data++;
-                        }
-                        buffer += (channels - 1) * width;
+                    uint8_t *data;
+                    int offset;
+                    if (is_planar) {
+                        data = this->frame->data[channel];
+                        offset = sample;
+                    } else {
+                        data = this->frame->data[0];
+                        offset = i;
+                    }
+                    float value;
+                    switch (format) {
+                    case AV_SAMPLE_FMT_S16:
+                    case AV_SAMPLE_FMT_S16P:
+                        value = reinterpret_cast<int16_t*>(data)[offset]
+                            / static_cast<float>(INT16_MAX);
+                        break;
+                    case AV_SAMPLE_FMT_S32:
+                    case AV_SAMPLE_FMT_S32P:
+                        value = reinterpret_cast<int32_t*>(data)[offset]
+                            / static_cast<float>(INT32_MAX);
+                        break;
+                    case AV_SAMPLE_FMT_FLT:
+                    case AV_SAMPLE_FMT_FLTP:
+                        value = reinterpret_cast<float*>(data)[offset];
+                        break;
+                    case AV_SAMPLE_FMT_DBL:
+                    case AV_SAMPLE_FMT_DBLP:
+                        value = reinterpret_cast<double*>(data)[offset];
+                        break;
+                    default:
+                        value = 0.0f;
+                        break;
                     }
+                    this->buffer[i++] = value;
                 }
-            } else {
-                memcpy(this->buffer, this->frame->data[0], buffer_size);
             }
-            return buffer_size;
+            return buffer_len;
         }
         if (this->packet.data) {
             this->packet.data -= this->offset;
diff --git a/src/spek-audio.h b/src/spek-audio.h
@@ -49,9 +49,7 @@ public:
     virtual int get_bits_per_sample() const = 0;
     virtual int get_channels() const = 0;
     virtual double get_duration() const = 0;
-    virtual int get_width() const = 0;
-    virtual bool get_fp() const = 0;
-    virtual const uint8_t *get_buffer() const = 0;
+    virtual const float *get_buffer() const = 0;
     virtual int64_t get_frames_per_interval() const = 0;
     virtual int64_t get_error_per_interval() const = 0;
     virtual int64_t get_error_base() const = 0;
diff --git a/src/spek-pipeline.cc b/src/spek-pipeline.cc
@@ -83,7 +83,6 @@ struct spek_pipeline
 static void * reader_func(void *);
 static void * worker_func(void *);
 static void reader_sync(struct spek_pipeline *p, int pos);
-static float average_input(const struct spek_pipeline *p, const void *buffer);
 
 struct spek_pipeline * spek_pipeline_open(
     std::unique_ptr<AudioFile> file, int bands, int samples, spek_pipeline_cb cb, void *cb_data)
@@ -295,16 +294,20 @@ static void * reader_func(void *pp)
     }
 
     int pos = 0, prev_pos = 0;
-    int block_size = p->file->get_width() * p->file->get_channels();
-    int size;
-    while ((size = p->file->read()) > 0) {
+    int channels = p->file->get_channels();
+    int len;
+    while ((len = p->file->read()) > 0) {
         if (p->quit) break;
 
-        const uint8_t *buffer = p->file->get_buffer();
-        while (size >= block_size) {
-            p->input[pos] = average_input(p, buffer);
-            buffer += block_size;
-            size -= block_size;
+        const float *buffer = p->file->get_buffer();
+        while (len >= channels) {
+            float val = 0.0f;
+            for (int i = 0; i < channels; i++) {
+                val += buffer[i];
+            }
+            p->input[pos] = val / channels;
+            buffer += channels;
+            len -= channels;
             pos = (pos + 1) % p->input_size;
 
             // Wake up the worker if we have enough data.
@@ -312,7 +315,7 @@ static void * reader_func(void *pp)
                 reader_sync(p, prev_pos = pos);
             }
         }
-        assert(size == 0);
+        assert(len == 0);
     }
 
     if (pos != prev_pos) {
@@ -431,37 +434,3 @@ static void * worker_func(void *pp)
         }
     }
 }
-
-static float average_input(const struct spek_pipeline *p, const void *buffer)
-{
-    int channels = p->file->get_channels();
-    float res = 0.0f;
-    if (p->file->get_fp()) {
-        if (p->file->get_width() == 4) {
-            float *b = (float*)buffer;
-            for (int i = 0; i < channels; i++) {
-                res += b[i];
-            }
-        } else {
-            assert(p->file->get_width() == 8);
-            double *b = (double*)buffer;
-            for (int i = 0; i < channels; i++) {
-                res += (float) b[i];
-            }
-        }
-    } else {
-        if (p->file->get_width() == 2) {
-            int16_t *b = (int16_t*)buffer;
-            for (int i = 0; i < channels; i++) {
-                res += b[i] / (float) INT16_MAX;
-            }
-        } else {
-            assert (p->file->get_width() == 4);
-            int32_t *b = (int32_t*)buffer;
-            for (int i = 0; i < channels; i++) {
-                res += b[i] / (float) INT32_MAX;
-            }
-        }
-    }
-    return res / channels;
-}

	spek Acoustic spectrum analyser
	git clone http://git.hanabi.in/repos/spek.git
	Log \| Files \| Refs \| README

M	src/spek-audio.cc	\|	98	++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
M	src/spek-audio.h	\|	4	+---
M	src/spek-pipeline.cc	\|	57	+++++++++++++--------------------------------------------