@@ -19,16 +19,17 @@ import encoderPath from 'opus-recorder/dist/encoderWorker.min.js';
19
19
import { MatrixClient } from "matrix-js-sdk/src/client" ;
20
20
import CallMediaHandler from "../CallMediaHandler" ;
21
21
import { SimpleObservable } from "matrix-widget-api" ;
22
- import { clamp } from "../utils/numbers" ;
22
+ import { clamp , percentageOf , percentageWithin } from "../utils/numbers" ;
23
23
import EventEmitter from "events" ;
24
24
import { IDestroyable } from "../utils/IDestroyable" ;
25
25
import { Singleflight } from "../utils/Singleflight" ;
26
26
import { PayloadEvent , WORKLET_NAME } from "./consts" ;
27
27
import { UPDATE_EVENT } from "../stores/AsyncStore" ;
28
28
import { Playback } from "./Playback" ;
29
+ import { createAudioContext } from "./compat" ;
29
30
30
31
const CHANNELS = 1 ; // stereo isn't important
31
- const SAMPLE_RATE = 48000 ; // 48khz is what WebRTC uses. 12khz is where we lose quality.
32
+ export const SAMPLE_RATE = 48000 ; // 48khz is what WebRTC uses. 12khz is where we lose quality.
32
33
const BITRATE = 24000 ; // 24kbps is pretty high quality for our use case in opus.
33
34
const TARGET_MAX_LENGTH = 120 ; // 2 minutes in seconds. Somewhat arbitrary, though longer == larger files.
34
35
const TARGET_WARN_TIME_LEFT = 10 ; // 10 seconds, also somewhat arbitrary.
@@ -55,6 +56,7 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
55
56
private recorderStream : MediaStream ;
56
57
private recorderFFT : AnalyserNode ;
57
58
private recorderWorklet : AudioWorkletNode ;
59
+ private recorderProcessor : ScriptProcessorNode ;
58
60
private buffer = new Uint8Array ( 0 ) ; // use this.audioBuffer to access
59
61
private mxc : string ;
60
62
private recording = false ;
@@ -90,78 +92,107 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
90
92
}
91
93
92
94
private async makeRecorder ( ) {
93
- this . recorderStream = await navigator . mediaDevices . getUserMedia ( {
94
- audio : {
95
- channelCount : CHANNELS ,
96
- noiseSuppression : true , // browsers ignore constraints they can't honour
97
- deviceId : CallMediaHandler . getAudioInput ( ) ,
98
- } ,
99
- } ) ;
100
- this . recorderContext = new AudioContext ( {
101
- // latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing)
102
- } ) ;
103
- this . recorderSource = this . recorderContext . createMediaStreamSource ( this . recorderStream ) ;
104
- this . recorderFFT = this . recorderContext . createAnalyser ( ) ;
105
-
106
- // Bring the FFT time domain down a bit. The default is 2048, and this must be a power
107
- // of two. We use 64 points because we happen to know down the line we need less than
108
- // that, but 32 would be too few. Large numbers are not helpful here and do not add
109
- // precision: they introduce higher precision outputs of the FFT (frequency data), but
110
- // it makes the time domain less than helpful.
111
- this . recorderFFT . fftSize = 64 ;
112
-
113
- // Set up our worklet. We use this for timing information and waveform analysis: the
114
- // web audio API prefers this be done async to avoid holding the main thread with math.
115
- const mxRecorderWorkletPath = document . body . dataset . vectorRecorderWorkletScript ;
116
- if ( ! mxRecorderWorkletPath ) {
117
- throw new Error ( "Unable to create recorder: no worklet script registered" ) ;
118
- }
119
- await this . recorderContext . audioWorklet . addModule ( mxRecorderWorkletPath ) ;
120
- this . recorderWorklet = new AudioWorkletNode ( this . recorderContext , WORKLET_NAME ) ;
121
-
122
- // Connect our inputs and outputs
123
- this . recorderSource . connect ( this . recorderFFT ) ;
124
- this . recorderSource . connect ( this . recorderWorklet ) ;
125
- this . recorderWorklet . connect ( this . recorderContext . destination ) ;
126
-
127
- // Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
128
- this . recorderWorklet . port . onmessage = ( ev ) => {
129
- switch ( ev . data [ 'ev' ] ) {
130
- case PayloadEvent . Timekeep :
131
- this . processAudioUpdate ( ev . data [ 'timeSeconds' ] ) ;
132
- break ;
133
- case PayloadEvent . AmplitudeMark :
134
- // Sanity check to make sure we're adding about one sample per second
135
- if ( ev . data [ 'forSecond' ] === this . amplitudes . length ) {
136
- this . amplitudes . push ( ev . data [ 'amplitude' ] ) ;
95
+ try {
96
+ this . recorderStream = await navigator . mediaDevices . getUserMedia ( {
97
+ audio : {
98
+ channelCount : CHANNELS ,
99
+ noiseSuppression : true , // browsers ignore constraints they can't honour
100
+ deviceId : CallMediaHandler . getAudioInput ( ) ,
101
+ } ,
102
+ } ) ;
103
+ this . recorderContext = createAudioContext ( {
104
+ // latencyHint: "interactive", // we don't want a latency hint (this causes data smoothing)
105
+ } ) ;
106
+ this . recorderSource = this . recorderContext . createMediaStreamSource ( this . recorderStream ) ;
107
+ this . recorderFFT = this . recorderContext . createAnalyser ( ) ;
108
+
109
+ // Bring the FFT time domain down a bit. The default is 2048, and this must be a power
110
+ // of two. We use 64 points because we happen to know down the line we need less than
111
+ // that, but 32 would be too few. Large numbers are not helpful here and do not add
112
+ // precision: they introduce higher precision outputs of the FFT (frequency data), but
113
+ // it makes the time domain less than helpful.
114
+ this . recorderFFT . fftSize = 64 ;
115
+
116
+ // Set up our worklet. We use this for timing information and waveform analysis: the
117
+ // web audio API prefers this be done async to avoid holding the main thread with math.
118
+ const mxRecorderWorkletPath = document . body . dataset . vectorRecorderWorkletScript ;
119
+ if ( ! mxRecorderWorkletPath ) {
120
+ // noinspection ExceptionCaughtLocallyJS
121
+ throw new Error ( "Unable to create recorder: no worklet script registered" ) ;
122
+ }
123
+
124
+ // Connect our inputs and outputs
125
+ this . recorderSource . connect ( this . recorderFFT ) ;
126
+
127
+ if ( this . recorderContext . audioWorklet ) {
128
+ await this . recorderContext . audioWorklet . addModule ( mxRecorderWorkletPath ) ;
129
+ this . recorderWorklet = new AudioWorkletNode ( this . recorderContext , WORKLET_NAME ) ;
130
+ this . recorderSource . connect ( this . recorderWorklet ) ;
131
+ this . recorderWorklet . connect ( this . recorderContext . destination ) ;
132
+
133
+ // Dev note: we can't use `addEventListener` for some reason. It just doesn't work.
134
+ this . recorderWorklet . port . onmessage = ( ev ) => {
135
+ switch ( ev . data [ 'ev' ] ) {
136
+ case PayloadEvent . Timekeep :
137
+ this . processAudioUpdate ( ev . data [ 'timeSeconds' ] ) ;
138
+ break ;
139
+ case PayloadEvent . AmplitudeMark :
140
+ // Sanity check to make sure we're adding about one sample per second
141
+ if ( ev . data [ 'forSecond' ] === this . amplitudes . length ) {
142
+ this . amplitudes . push ( ev . data [ 'amplitude' ] ) ;
143
+ }
144
+ break ;
137
145
}
138
- break ;
146
+ } ;
147
+ } else {
148
+ // Safari fallback: use a processor node instead, buffered to 1024 bytes of data
149
+ // like the worklet is.
150
+ this . recorderProcessor = this . recorderContext . createScriptProcessor ( 1024 , CHANNELS , CHANNELS ) ;
151
+ this . recorderSource . connect ( this . recorderProcessor ) ;
152
+ this . recorderProcessor . connect ( this . recorderContext . destination ) ;
153
+ this . recorderProcessor . addEventListener ( "audioprocess" , this . onAudioProcess ) ;
139
154
}
140
- } ;
141
-
142
- this . recorder = new Recorder ( {
143
- encoderPath, // magic from webpack
144
- encoderSampleRate : SAMPLE_RATE ,
145
- encoderApplication : 2048 , // voice (default is "audio")
146
- streamPages : true , // this speeds up the encoding process by using CPU over time
147
- encoderFrameSize : 20 , // ms, arbitrary frame size we send to the encoder
148
- numberOfChannels : CHANNELS ,
149
- sourceNode : this . recorderSource ,
150
- encoderBitRate : BITRATE ,
151
-
152
- // We use low values for the following to ease CPU usage - the resulting waveform
153
- // is indistinguishable for a voice message. Note that the underlying library will
154
- // pick defaults which prefer the highest possible quality, CPU be damned.
155
- encoderComplexity : 3 , // 0-10, 10 is slow and high quality.
156
- resampleQuality : 3 , // 0-10, 10 is slow and high quality
157
- } ) ;
158
- this . recorder . ondataavailable = ( a : ArrayBuffer ) => {
159
- const buf = new Uint8Array ( a ) ;
160
- const newBuf = new Uint8Array ( this . buffer . length + buf . length ) ;
161
- newBuf . set ( this . buffer , 0 ) ;
162
- newBuf . set ( buf , this . buffer . length ) ;
163
- this . buffer = newBuf ;
164
- } ;
155
+
156
+ this . recorder = new Recorder ( {
157
+ encoderPath, // magic from webpack
158
+ encoderSampleRate : SAMPLE_RATE ,
159
+ encoderApplication : 2048 , // voice (default is "audio")
160
+ streamPages : true , // this speeds up the encoding process by using CPU over time
161
+ encoderFrameSize : 20 , // ms, arbitrary frame size we send to the encoder
162
+ numberOfChannels : CHANNELS ,
163
+ sourceNode : this . recorderSource ,
164
+ encoderBitRate : BITRATE ,
165
+
166
+ // We use low values for the following to ease CPU usage - the resulting waveform
167
+ // is indistinguishable for a voice message. Note that the underlying library will
168
+ // pick defaults which prefer the highest possible quality, CPU be damned.
169
+ encoderComplexity : 3 , // 0-10, 10 is slow and high quality.
170
+ resampleQuality : 3 , // 0-10, 10 is slow and high quality
171
+ } ) ;
172
+ this . recorder . ondataavailable = ( a : ArrayBuffer ) => {
173
+ const buf = new Uint8Array ( a ) ;
174
+ const newBuf = new Uint8Array ( this . buffer . length + buf . length ) ;
175
+ newBuf . set ( this . buffer , 0 ) ;
176
+ newBuf . set ( buf , this . buffer . length ) ;
177
+ this . buffer = newBuf ;
178
+ } ;
179
+ } catch ( e ) {
180
+ console . error ( "Error starting recording: " , e ) ;
181
+ if ( e instanceof DOMException ) { // Unhelpful DOMExceptions are common - parse them sanely
182
+ console . error ( `${ e . name } (${ e . code } ): ${ e . message } ` ) ;
183
+ }
184
+
185
+ // Clean up as best as possible
186
+ if ( this . recorderStream ) this . recorderStream . getTracks ( ) . forEach ( t => t . stop ( ) ) ;
187
+ if ( this . recorderSource ) this . recorderSource . disconnect ( ) ;
188
+ if ( this . recorder ) this . recorder . close ( ) ;
189
+ if ( this . recorderContext ) {
190
+ // noinspection ES6MissingAwait - not important that we wait
191
+ this . recorderContext . close ( ) ;
192
+ }
193
+
194
+ throw e ; // rethrow so upstream can handle it
195
+ }
165
196
}
166
197
167
198
private get audioBuffer ( ) : Uint8Array {
@@ -190,14 +221,30 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
190
221
return this . mxc ;
191
222
}
192
223
224
+ private onAudioProcess = ( ev : AudioProcessingEvent ) => {
225
+ this . processAudioUpdate ( ev . playbackTime ) ;
226
+
227
+ // We skip the functionality of the worklet regarding waveform calculations: we
228
+ // should get that information pretty quick during the playback info.
229
+ } ;
230
+
193
231
private processAudioUpdate = ( timeSeconds : number ) => {
194
232
if ( ! this . recording ) return ;
195
233
196
234
// The time domain is the input to the FFT, which means we use an array of the same
197
235
// size. The time domain is also known as the audio waveform. We're ignoring the
198
236
// output of the FFT here (frequency data) because we're not interested in it.
199
237
const data = new Float32Array ( this . recorderFFT . fftSize ) ;
200
- this . recorderFFT . getFloatTimeDomainData ( data ) ;
238
+ if ( ! this . recorderFFT . getFloatTimeDomainData ) {
239
+ // Safari compat
240
+ const data2 = new Uint8Array ( this . recorderFFT . fftSize ) ;
241
+ this . recorderFFT . getByteTimeDomainData ( data2 ) ;
242
+ for ( let i = 0 ; i < data2 . length ; i ++ ) {
243
+ data [ i ] = percentageWithin ( percentageOf ( data2 [ i ] , 0 , 256 ) , - 1 , 1 ) ;
244
+ }
245
+ } else {
246
+ this . recorderFFT . getFloatTimeDomainData ( data ) ;
247
+ }
201
248
202
249
// We can't just `Array.from()` the array because we're dealing with 32bit floats
203
250
// and the built-in function won't consider that when converting between numbers.
@@ -268,7 +315,11 @@ export class VoiceRecording extends EventEmitter implements IDestroyable {
268
315
// Disconnect the source early to start shutting down resources
269
316
await this . recorder . stop ( ) ; // stop first to flush the last frame
270
317
this . recorderSource . disconnect ( ) ;
271
- this . recorderWorklet . disconnect ( ) ;
318
+ if ( this . recorderWorklet ) this . recorderWorklet . disconnect ( ) ;
319
+ if ( this . recorderProcessor ) {
320
+ this . recorderProcessor . disconnect ( ) ;
321
+ this . recorderProcessor . removeEventListener ( "audioprocess" , this . onAudioProcess ) ;
322
+ }
272
323
273
324
// close the context after the recorder so the recorder doesn't try to
274
325
// connect anything to the context (this would generate a warning)
0 commit comments