src/sound/AudioTimeStretcher.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221

/* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */

/*
    Rosegarden
    A MIDI and audio sequencer and musical notation editor.

    This program is Copyright 2000-2008
        Guillaume Laurent   <[email protected]>,
        Chris Cannam        <[email protected]>,
        Richard Bown        <[email protected]>

    The moral rights of Guillaume Laurent, Chris Cannam, and Richard
    Bown to claim authorship of this work have been asserted.

    Other copyrights also apply to some parts of this work.  Please
    see the AUTHORS file and individual file headers for details.

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
*/

/*
    This file is derived from

    Sonic Visualiser
    An audio file viewer and annotation editor.
    Centre for Digital Music, Queen Mary, University of London.
    This file copyright 2006 Chris Cannam and TQMUL.
    
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
    published by the Free Software Foundation; either version 2 of the
    License, or (at your option) any later version.  See the file
    COPYING included with this distribution for more information.
*/

#ifndef _AUDIO_TIME_STRETCHER_H_
#define _AUDIO_TIME_STRETCHER_H_

#include "SampleWindow.h"
#include "RingBuffer.h"

#include <fftw3.h>
#include <pthread.h>
#include <list>

namespace Rosegarden
{

/**
 * A time stretcher that alters the performance speed of audio,
 * preserving pitch.
 *
 * This is based on the straightforward phase vocoder with phase
 * unwrapping (as in e.g. the DAFX book pp275-), with optional
 * percussive transient detection to avoid smearing percussive notes
 * and resynchronise phases, and adding a stream API for real-time
 * use.  Principles and methods from Chris Duxbury, AES 2002 and 2004
 * thesis; Emmanuel Ravelli, DAFX 2005; Dan Barry, ISSC 2005 on
 * percussion detection; code by Chris Cannam.
 */

class AudioTimeStretcher
{
public:
    AudioTimeStretcher(size_t sampleRate,
                       size_t channels,
                       float ratio,
                       bool sharpen,
                       size_t maxOutputBlockSize);
    virtual ~AudioTimeStretcher();

    /**
     * Return the number of samples that would need to be added via
     * putInput in order to provoke the time stretcher into doing some
     * time stretching and making more output samples available.
     * This will be an estimate, if transient sharpening is on; the 
     * caller may need to do the put/get/test cycle more than once.
     */
    size_t getRequiredInputSamples() const;

    /**
     * Put (and possibly process) a given number of input samples.
     * Number should usually equal the value returned from
     * getRequiredInputSamples().
     */
    void putInput(float **input, size_t samples);

    /**
     * Get the number of processed samples ready for reading.
     */
    size_t getAvailableOutputSamples() const;

    /**
     * Get some processed samples.
     */
    void getOutput(float **output, size_t samples);

    //!!! and reset?

    /**
     * Change the time stretch ratio.
     */
    void setRatio(float ratio);

    /**
     * Get the hop size for input.
     */
    size_t getInputIncrement() const { return m_n1; }

    /**
     * Get the hop size for output.
     */
    size_t getOutputIncrement() const { return m_n2; }

    /**
     * Get the window size for FFT processing.
     */
    size_t getWindowSize() const { return m_wlen; }

    /**
     * Get the stretch ratio.
     */
    float getRatio() const { return float(m_n2) / float(m_n1); }

    /**
     * Return whether this time stretcher will attempt to sharpen transients.
     */
    bool getSharpening() const { return m_sharpen; }

    /**
     * Return the number of channels for this time stretcher.
     */
    size_t getChannelCount() const { return m_channels; }

    /**
     * Get the latency added by the time stretcher, in sample frames.
     * This will be exact if transient sharpening is off, or approximate
     * if it is on.
     */
    size_t getProcessingLatency() const;

protected:
    /**
     * Process a single phase vocoder frame from "in" into
     * m_freq[channel].
     */
    void analyseBlock(size_t channel, float *in); // into m_freq[channel]

    /**
     * Examine m_freq[0..m_channels-1] and return whether a percussive
     * transient is found.
     */
    bool isTransient(); 

    /**
     * Resynthesise from m_freq[channel] adding in to "out",
     * adjusting phases on the basis of a prior step size of lastStep.
     * Also add the window tqshape in to the modulation array (if
     * present) -- for use in ensuring the output has the correct
     * magnitude afterwards.
     */
    void synthesiseBlock(size_t channel, float *out, float *modulation,
                         size_t lastStep);

    void initialise();
    void calculateParameters();
    void cleanup();

    bool shouldSharpen() {
        return m_sharpen && (m_ratio > 0.25);
    }

    size_t m_sampleRate;
    size_t m_channels;
    size_t m_maxOutputBlockSize;
    float m_ratio;
    bool m_sharpen;
    size_t m_n1;
    size_t m_n2;
    size_t m_wlen;
    SampleWindow<float> *m_analysisWindow;
    SampleWindow<float> *m_synthesisWindow;

    int m_totalCount;
    int m_transientCount;

    int m_n2sum;
    int m_n2total;
    std::list<int> m_n2list;
    int m_adjustCount;

    float **m_prevPhase;
    float **m_prevAdjustedPhase;

    float *m_prevTransientMag;
    int  m_prevTransientScore;
    int  m_transientThreshold;
    bool m_prevTransient;

    float *m_tempbuf;
    float **m_time;
    fftwf_complex **m_freq;
    fftwf_plan *m_plan;
    fftwf_plan *m_iplan;
    
    RingBuffer<float> **m_inbuf;
    RingBuffer<float> **m_outbuf;
    float **m_mashbuf;
    float *m_modulationbuf;

    mutable pthread_mutex_t m_mutex;
};

}


#endif