Merge pull request #2152 from daschuer/analyzer_start

Fix time offset in key and beat analysis
author: Uwe Klotz <uklotz@mixxx.org> 2019-08-08 09:36:41 +0200
committer: GitHub <noreply@github.com> 2019-08-08 09:36:41 +0200
commit: 58f7a1efb5cf5e27addfb9acf444f6bf41b15392 (patch)
tree: 4796d46ad0909949eefe790702cb5f10a7434726 /src/analyzer
parent: cb10da77b05dbf2fd519774d20bea43e52912e6d (diff)
parent: a84bfc259b08726f4e05969a303828949c325e5c (diff)
6 files changed, 79 insertions, 49 deletions
diff --git a/src/analyzer/analyzerthread.cpp b/src/analyzer/analyzerthread.cpp
index af3bf6930a..c0e4cc9ca7 100644
--- a/src/analyzer/analyzerthread.cpp
+++ b/src/analyzer/analyzerthread.cpp
@@ -256,7 +256,8 @@ AnalyzerThread::AnalysisResult AnalyzerThread::analyzeAudioSource(
         }
 
         // 2nd: step: Analyze chunk of decoded audio data
-        if (readableSampleFrames.frameLength() == mixxx::kAnalysisFramesPerBlock) {
+        if (readableSampleFrames.frameLength() == mixxx::kAnalysisFramesPerBlock ||
+                remainingFrames.empty()) {
             // Complete chunk of audio samples has been read for analysis
             for (auto&& analyzer : m_analyzers) {
                 analyzer.processSamples(
@@ -267,19 +268,13 @@ AnalyzerThread::AnalysisResult AnalyzerThread::analyzeAudioSource(
                 result = AnalysisResult::Complete;
             }
         } else {
-            // Partial chunk of audio samples has been read.
-            // This should only happen at the end of an audio stream,
-            // otherwise a decoding error must have occurred.
-            if (remainingFrames.empty()) {
-                result = AnalysisResult::Complete;
-            } else {
-                // EOF not reached -> Maybe a corrupt file?
-                kLogger.warning()
-                        << "Aborting analysis after failure to read sample data:"
-                        << "expected frames =" << inputFrameIndexRange
-                        << ", actual frames =" << readableSampleFrames.frameIndexRange();
-                result = AnalysisResult::Partial;
-            }
+            // Partial chunk of audio samples has been read, but not the final.
+            // A decoding error must have occurred, maybe a corrupt file?
+            kLogger.warning()
+                    << "Aborting analysis after failure to read sample data:"
+                    << "expected frames =" << inputFrameIndexRange
+                    << ", actual frames =" << readableSampleFrames.frameIndexRange();
+            result = AnalysisResult::Partial;
         }
 
         // Don't check again for paused/stopped and simply finish the
diff --git a/src/analyzer/plugins/analyzerqueenmarybeats.cpp b/src/analyzer/plugins/analyzerqueenmarybeats.cpp
index 3eca0f9b04..643261c693 100644
--- a/src/analyzer/plugins/analyzerqueenmarybeats.cpp
+++ b/src/analyzer/plugins/analyzerqueenmarybeats.cpp
@@ -53,7 +53,6 @@ bool AnalyzerQueenMaryBeats::initialize(int samplerate) {
 }
 
 bool AnalyzerQueenMaryBeats::processSamples(const CSAMPLE* pIn, const int iLen) {
-    DEBUG_ASSERT(iLen == kAnalysisSamplesPerBlock);
     DEBUG_ASSERT(iLen % kAnalysisChannels == 0);
     if (!m_pDetectionFunction) {
         return false;
@@ -63,8 +62,7 @@ bool AnalyzerQueenMaryBeats::processSamples(const CSAMPLE* pIn, const int iLen)
 }
 
 bool AnalyzerQueenMaryBeats::finalize() {
-    // TODO(rryan) if iLen is less than frame size, pad with zeros. Do we need
-    // flush support?
+    m_helper.finalize();
 
     int nonZeroCount = m_detectionResults.size();
     while (nonZeroCount > 0 && m_detectionResults.at(nonZeroCount - 1) <= 0.0) {
@@ -72,21 +70,13 @@ bool AnalyzerQueenMaryBeats::finalize() {
     }
 
     std::vector<double> df;
-    std::vector<double> beatPeriod;
+    std::vector<double> beatPeriod(nonZeroCount);
     std::vector<double> tempi;
 
     df.reserve(nonZeroCount);
-    beatPeriod.reserve(nonZeroCount);
 
-    // NOTE(rryan): The VAMP plugin skipped the first 2 detection function
-    // results so I do as well. Not sure why.
-    for (int i = 2; i < nonZeroCount; ++i) {
+    for (int i = 0; i < nonZeroCount; ++i) {
         df.push_back(m_detectionResults.at(i));
-        beatPeriod.push_back(0.0);
-    }
-
-    if (df.empty()) {
-        return false;
     }
 
     TempoTrackV2 tt(m_iSampleRate, kStepSize);
@@ -95,10 +85,33 @@ bool AnalyzerQueenMaryBeats::finalize() {
     std::vector<double> beats;
     tt.calculateBeats(df, beatPeriod, beats);
 
-    m_resultBeats.resize(beats.size());
-    double* result = (double*)&m_resultBeats.at(0);
-    for (size_t i = 0; i < beats.size(); ++i) {
-        result[i] = beats[i] * kStepSize;
+    // In some tracks a beat at 0:00 is detected when a noise floor starts.
+    // Here we check the level and the position for plausibility and remove
+    // the beat if this is the case.
+    size_t firstBeat = 0;
+    if (beats.size() >= 3) {
+        if (beats.at(0) <= 0) {
+            firstBeat = 1;
+        } else if (m_detectionResults.at(beats.at(0)) <
+                (m_detectionResults.at(beats.at(1)) +
+                m_detectionResults.at(beats.at(2))) / 4) {
+            // the beat is not half es high than the average of the two
+            // following beats. Skip it.
+            firstBeat = 1;
+        } else {
+            int diff = (beats.at(1) - beats.at(0)) - (beats.at(2) - beats.at(1));
+            // we don't allow a signifcant tempo change after the first beat
+            if (diff > 2 || diff < -2) {
+                // first beat is off grid. Skip it.
+                firstBeat = 1;
+            }
+        }
+    }
+
+    m_resultBeats.reserve(beats.size());
+    for (size_t i = firstBeat; i < beats.size(); ++i) {
+        double result = (beats.at(i) * kStepSize) - kStepSize / 2;
+        m_resultBeats.push_back(result);
     }
 
     m_pDetectionFunction.reset();
diff --git a/src/analyzer/plugins/analyzerqueenmarykey.cpp b/src/analyzer/plugins/analyzerqueenmarykey.cpp
index 6cd846acfe..5933699606 100644
--- a/src/analyzer/plugins/analyzerqueenmarykey.cpp
+++ b/src/analyzer/plugins/analyzerqueenmarykey.cpp
@@ -77,9 +77,7 @@ bool AnalyzerQueenMaryKey::initialize(int samplerate) {
 }
 
 bool AnalyzerQueenMaryKey::processSamples(const CSAMPLE* pIn, const int iLen) {
-    DEBUG_ASSERT(iLen == kAnalysisSamplesPerBlock);
     DEBUG_ASSERT(iLen % kAnalysisChannels == 0);
-
     if (!m_pKeyMode) {
         return false;
     }
@@ -90,7 +88,6 @@ bool AnalyzerQueenMaryKey::processSamples(const CSAMPLE* pIn, const int iLen) {
 }
 
 bool AnalyzerQueenMaryKey::finalize() {
-    // TODO(rryan) do we need a flush?
     m_helper.finalize();
     m_pKeyMode.reset();
     return true;
diff --git a/src/analyzer/plugins/analyzersoundtouchbeats.cpp b/src/analyzer/plugins/analyzersoundtouchbeats.cpp
index cc28733e4c..13edc5d6d6 100644
--- a/src/analyzer/plugins/analyzersoundtouchbeats.cpp
+++ b/src/analyzer/plugins/analyzersoundtouchbeats.cpp
@@ -25,7 +25,6 @@ bool AnalyzerSoundTouchBeats::processSamples(const CSAMPLE* pIn, const int iLen)
     if (!m_pSoundTouch) {
         return false;
     }
-    DEBUG_ASSERT(iLen == kAnalysisSamplesPerBlock);
     DEBUG_ASSERT(iLen % kAnalysisChannels == 0);
     // We analyze a mono mixdown of the signal since we don't think stereo does
     // us any good.
diff --git a/src/analyzer/plugins/buffering_utils.cpp b/src/analyzer/plugins/buffering_utils.cpp
index b558f0cc41..a584119d0d 100644
--- a/src/analyzer/plugins/buffering_utils.cpp
+++ b/src/analyzer/plugins/buffering_utils.cpp
@@ -1,21 +1,42 @@
 #include "analyzer/plugins/buffering_utils.h"
 
 #include "util/math.h"
+#include "util/sample.h"
+
+#include <string.h>
 
 namespace mixxx {
 
 bool DownmixAndOverlapHelper::initialize(size_t windowSize, size_t stepSize, WindowReadyCallback callback) {
-    m_buffer.resize(windowSize);
+    m_buffer.assign(windowSize, 0.0);
     m_callback = callback;
     m_windowSize = windowSize;
     m_stepSize = stepSize;
-    m_bufferWritePosition = 0;
+    // make sure the first frame is centered into the fft window. This makes sure
+    // that the result is significant starting fom the first step.
+    m_bufferWritePosition = windowSize / 2;
     return m_windowSize > 0 && m_stepSize > 0 &&
             m_stepSize <= m_windowSize && callback;
 }
 
 bool DownmixAndOverlapHelper::processStereoSamples(const CSAMPLE* pInput, size_t inputStereoSamples) {
     const size_t numInputFrames = inputStereoSamples / 2;
+    return processInner(pInput, numInputFrames);
+}
+
+bool DownmixAndOverlapHelper::finalize() {
+    // We need to append at least m_windowSize / 2 - m_stepSize silence
+    // to have a valid analysis results for the last track samples.
+    // Since we proceed in fixed steps, up to "m_stepSize - 1" sample remain
+    // unprocessed. That is the reason why we use "m_windowSize / 2 - 1" below,
+    // instead of "m_windowSize / 2 - m_stepSize"
+    size_t framesToFillWindow = m_windowSize - m_bufferWritePosition;
+    size_t numInputFrames = math_max(framesToFillWindow, m_windowSize / 2 - 1);
+    return processInner(nullptr, numInputFrames);
+}
+
+bool DownmixAndOverlapHelper::processInner(
+        const CSAMPLE* pInput, size_t numInputFrames) {
     size_t inRead = 0;
     double* pDownmix = m_buffer.data();
 
@@ -23,12 +44,20 @@ bool DownmixAndOverlapHelper::processStereoSamples(const CSAMPLE* pInput, size_t
         size_t writeAvailable = math_min(numInputFrames,
                 m_windowSize - m_bufferWritePosition);
 
-        for (size_t i = 0; i < writeAvailable; ++i) {
-            // We analyze a mono downmix of the signal since we don't think
-            // stereo does us any good.
-            pDownmix[m_bufferWritePosition + i] = (pInput[(inRead + i) * 2] +
-                                                          pInput[(inRead + i) * 2 + 1]) *
-                    0.5;
+        if (pInput) {
+            for (size_t i = 0; i < writeAvailable; ++i) {
+                // We analyze a mono downmix of the signal since we don't think
+                // stereo does us any good.
+                pDownmix[m_bufferWritePosition + i] = (pInput[(inRead + i) * 2] +
+                                                              pInput[(inRead + i) * 2 + 1]) *
+                        0.5;
+            }
+        } else {
+            // we are in the finalize call. Add silence to
+            // complete samples left in th buffer.
+            for (size_t i = 0; i < writeAvailable; ++i) {
+                pDownmix[m_bufferWritePosition + i] = 0;
+            }
         }
         m_bufferWritePosition += writeAvailable;
         inRead += writeAvailable;
@@ -52,9 +81,4 @@ bool DownmixAndOverlapHelper::processStereoSamples(const CSAMPLE* pInput, size_t
     return true;
 }
 
-bool DownmixAndOverlapHelper::finalize() {
-    // TODO(rryan) flush support?
-    return true;
-}
-
-}
-\ No newline at end of file
+} // namespace mixxx
diff --git a/src/analyzer/plugins/buffering_utils.h b/src/analyzer/plugins/buffering_utils.h
index b950a235e4..8e4ade0d4d 100644
--- a/src/analyzer/plugins/buffering_utils.h
+++ b/src/analyzer/plugins/buffering_utils.h
@@ -28,6 +28,8 @@ class DownmixAndOverlapHelper {
     bool finalize();
 
   private:
+    bool processInner(const CSAMPLE* pInput, size_t numInputFrames);
+
     std::vector<double> m_buffer;
     // The window size in frames.
     size_t m_windowSize = 0;
author	Uwe Klotz <uklotz@mixxx.org>	2019-08-08 09:36:41 +0200
committer	GitHub <noreply@github.com>	2019-08-08 09:36:41 +0200
commit	58f7a1efb5cf5e27addfb9acf444f6bf41b15392 (patch)
tree	4796d46ad0909949eefe790702cb5f10a7434726 /src/analyzer
parent	cb10da77b05dbf2fd519774d20bea43e52912e6d (diff)
parent	a84bfc259b08726f4e05969a303828949c325e5c (diff)