3rdparty/iaxclient/lib/audio_encode.c

   1 /*
   2  * iaxclient: a cross-platform IAX softphone library
   3  *
   4  * Copyrights:
   5  * Copyright (C) 2003-2006, Horizon Wimba, Inc.
   6  * Copyright (C) 2007, Wimba, Inc.
   7  *
   8  * Contributors:
   9  * Steve Kann <stevek@stevek.com>
  10  * Michael Van Donselaar <mvand@vandonselaar.org>
  11  * Shawn Lawrence <shawn.lawrence@terracecomm.com>
  12  *
  13  * This program is free software, distributed under the terms of
  14  * the GNU Lesser (Library) General Public License.
  15  */
  16
  17 #include "audio_encode.h"
  18 #include "iaxclient_lib.h"
  19 #include "iax-client.h"
  20 #ifdef CODEC_GSM
  21 #include "codec_gsm.h"
  22 #endif
  23 #include "codec_ulaw.h"
  24 #include "codec_alaw.h"
  25
  26 #include "codec_speex.h"
  27 #include <speex/speex_preprocess.h>
  28
  29 #ifdef CODEC_ILBC
  30 #include "codec_ilbc.h"
  31 #endif
  32
  33 float iaxci_silence_threshold = AUDIO_ENCODE_SILENCE_DB;
  34
  35 static float input_level = 0.0f;
  36 static float output_level = 0.0f;
  37
  38 static SpeexPreprocessState *st = NULL;
  39 static int speex_state_size = 0;
  40 static int speex_state_rate = 0;
  41
  42 int iaxci_filters = IAXC_FILTER_AGC|IAXC_FILTER_DENOISE|IAXC_FILTER_AAGC|IAXC_FILTER_CN;
  43
  44 /* use to measure time since last audio was processed */
  45 static struct timeval timeLastInput ;
  46 static struct timeval timeLastOutput ;
  47
  48 static struct iaxc_speex_settings speex_settings =
  49 {
  50         1,    /* decode_enhance */
  51         -1,   /* float quality */
  52         -1,   /* bitrate */
  53         0,    /* vbr */
  54         0,    /* abr */
  55         3     /* complexity */
  56 };
  57
  58
  59 static float vol_to_db(float vol)
  60 {
  61         /* avoid calling log10() on zero which yields inf or
  62          * negative numbers which yield nan */
  63         if ( vol <= 0.0f )
  64                 return AUDIO_ENCODE_SILENCE_DB;
  65         else
  66                 return log10f(vol) * 20.0f;
  67 }
  68
  69 static int do_level_callback()
  70 {
  71         static struct timeval last = {0,0};
  72         struct timeval now;
  73         float input_db;
  74         float output_db;
  75
  76         now = iax_tvnow();
  77
  78         if ( last.tv_sec != 0 && iaxci_usecdiff(&now, &last) < 100000 )
  79                 return 0;
  80
  81         last = now;
  82
  83         /* if input has not been processed in the last second, set to silent */
  84         input_db = iaxci_usecdiff(&now, &timeLastInput) < 1000000 ?
  85                         vol_to_db(input_level) : AUDIO_ENCODE_SILENCE_DB;
  86
  87         /* if output has not been processed in the last second, set to silent */
  88         output_db = iaxci_usecdiff(&now, &timeLastOutput) < 1000000 ?
  89                 vol_to_db(output_level) : AUDIO_ENCODE_SILENCE_DB;
  90
  91         iaxci_do_levels_callback(input_db, output_db);
  92
  93         return 0;
  94 }
  95
  96 static void set_speex_filters()
  97 {
  98         int i;
  99
 100         if ( !st )
 101                 return;
 102
 103         i = 1; /* always make VAD decision */
 104         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &i);
 105         i = (iaxci_filters & IAXC_FILTER_AGC) ? 1 : 0;
 106         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC, &i);
 107         i = (iaxci_filters & IAXC_FILTER_DENOISE) ? 1 : 0;
 108         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DENOISE, &i);
 109
 110         /*
 111         * We can tweak these parameters to play with VAD sensitivity.
 112         * For now, we use the default values since it seems they are a good starting point.
 113         * However, if need be, this is the code that needs to change
 114         */
 115         i = 35;
 116         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_PROB_START, &i);
 117         i = 20;
 118         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_PROB_CONTINUE, &i);
 119 }
 120
 121 static void calculate_level(short *audio, int len, float *level)
 122 {
 123         int big_sample = 0;
 124         int i;
 125
 126         for ( i = 0; i < len; i++ )
 127         {
 128                 const int sample = abs(audio[i]);
 129                 big_sample = sample > big_sample ?
 130                         sample : big_sample;
 131         }
 132
 133         *level += ((float)big_sample / 32767.0f - *level) / 5.0f;
 134 }
 135
 136
 137 static int input_postprocess(void *audio, int len, int rate)
 138 {
 139         static float lowest_volume = 1.0f;
 140         float volume;
 141         int silent = 0;
 142
 143         if ( !st || speex_state_size != len || speex_state_rate != rate )
 144         {
 145                 if (st)
 146                         speex_preprocess_state_destroy(st);
 147                 st = speex_preprocess_state_init(len,rate);
 148                 speex_state_size = len;
 149                 speex_state_rate = rate;
 150                 set_speex_filters();
 151         }
 152
 153         calculate_level((short *)audio, len, &input_level);
 154
 155         /* only preprocess if we're interested in VAD, AGC, or DENOISE */
 156         if ( (iaxci_filters & (IAXC_FILTER_DENOISE | IAXC_FILTER_AGC)) ||
 157                         iaxci_silence_threshold > 0.0f )
 158                 silent = !speex_preprocess(st, (spx_int16_t *)audio, NULL);
 159
 160         /* Analog AGC: Bring speex AGC gain out to mixer, with lots of hysteresis */
 161         /* use a higher continuation threshold for AAGC than for VAD itself */
 162         if ( !silent &&
 163              iaxci_silence_threshold != 0.0f &&
 164              (iaxci_filters & IAXC_FILTER_AGC) &&
 165              (iaxci_filters & IAXC_FILTER_AAGC)
 166            )
 167         {
 168                 static int i = 0;
 169
 170                 i++;
 171
 172                 if ( (i & 0x3f) == 0 )
 173                 {
 174                         float loudness = st->loudness2;
 175                         // speex_preprocess_ctl(st, SPEEX_PREPROCESS_GET_AGC_LOUDNESS, &loudness);
 176                         if ( loudness > 8000.0f || loudness < 4000.0f )
 177                         {
 178                                 const float level = iaxc_input_level_get();
 179
 180                                 if ( loudness > 16000.0f && level > 0.5f )
 181                                 {
 182                                         /* lower quickly if we're really too hot */
 183                                         iaxc_input_level_set(level - 0.2f);
 184                                 }
 185                                 else if ( loudness > 8000.0f && level >= 0.15f )
 186                                 {
 187                                         /* lower less quickly if we're a bit too hot */
 188                                         iaxc_input_level_set(level - 0.1f);
 189                                 }
 190                                 else if ( loudness < 4000.0f && level <= 0.9f )
 191                                 {
 192                                         /* raise slowly if we're cold */
 193                                         iaxc_input_level_set(level + 0.1f);
 194                                 }
 195                         }
 196                 }
 197         }
 198
 199         /* This is ugly. Basically just don't get volume level if speex thought
 200          * we were silent. Just set it to 0 in that case */
 201         if ( iaxci_silence_threshold > 0.0f && silent )
 202                 input_level = 0.0f;
 203
 204         do_level_callback();
 205
 206         volume = vol_to_db(input_level);
 207
 208         if ( volume < lowest_volume )
 209                 lowest_volume = volume;
 210
 211         if ( iaxci_silence_threshold > 0.0f )
 212                 return silent;
 213         else
 214                 return volume < iaxci_silence_threshold;
 215 }
 216
 217 static int output_postprocess(void *audio, int len)
 218 {
 219         calculate_level((short *)audio, len, &output_level);
 220
 221         do_level_callback();
 222
 223         return 0;
 224 }
 225
 226 static struct iaxc_audio_codec *create_codec(int format)
 227 {
 228         switch (format & IAXC_AUDIO_FORMAT_MASK)
 229         {
 230 #ifdef CODEC_GSM
 231         case IAXC_FORMAT_GSM:
 232                 return codec_audio_gsm_new();
 233 #endif
 234         case IAXC_FORMAT_ULAW:
 235                 return codec_audio_ulaw_new();
 236         case IAXC_FORMAT_ALAW:
 237                 return codec_audio_alaw_new();
 238         case IAXC_FORMAT_SPEEX:
 239                 return codec_audio_speex_new(&speex_settings);
 240 #ifdef CODEC_ILBC
 241         case IAXC_FORMAT_ILBC:
 242                 return codec_audio_ilbc_new();
 243 #endif
 244         default:
 245                 /* ERROR: codec not supported */
 246                 fprintf(stderr, "ERROR: Codec not supported: %d\n", format);
 247                 return NULL;
 248         }
 249 }
 250
 251 EXPORT void iaxc_set_speex_settings(int decode_enhance, float quality,
 252                 int bitrate, int vbr, int abr, int complexity)
 253 {
 254         speex_settings.decode_enhance = decode_enhance;
 255         speex_settings.quality = quality;
 256         speex_settings.bitrate = bitrate;
 257         speex_settings.vbr = vbr;
 258         speex_settings.abr = abr;
 259         speex_settings.complexity = complexity;
 260 }
 261
 262 int audio_send_encoded_audio(struct iaxc_call *call, int callNo, void *data,
 263                 int format, int samples)
 264 {
 265         unsigned char outbuf[1024];
 266         int outsize = 1024;
 267         int silent;
 268         int insize = samples;
 269
 270         /* update last input timestamp */
 271         timeLastInput = iax_tvnow();
 272
 273         silent = input_postprocess(data, insize, 8000);
 274
 275         if(silent)
 276         {
 277                 if(!call->tx_silent)
 278                 {  /* send a Comfort Noise Frame */
 279                         call->tx_silent = 1;
 280                         if ( iaxci_filters & IAXC_FILTER_CN )
 281                                 iax_send_cng(call->session, 10, NULL, 0);
 282                 }
 283                 return 0;  /* poof! no encoding! */
 284         }
 285
 286         /* we're going to send voice now */
 287         call->tx_silent = 0;
 288
 289         /* destroy encoder if it is incorrect type */
 290         if(call->encoder && call->encoder->format != format)
 291         {
 292                 call->encoder->destroy(call->encoder);
 293                 call->encoder = NULL;
 294         }
 295
 296         /* just break early if there's no format defined: this happens for the
 297          * first couple of frames of new calls */
 298         if(format == 0) return 0;
 299
 300         /* create encoder if necessary */
 301         if(!call->encoder)
 302         {
 303                 call->encoder = create_codec(format);
 304         }
 305
 306         if(!call->encoder)
 307         {
 308                 /* ERROR: no codec */
 309                 fprintf(stderr, "ERROR: Codec could not be created: %d\n", format);
 310                 return 0;
 311         }
 312
 313         if(call->encoder->encode(call->encoder, &insize, (short *)data,
 314                                 &outsize, outbuf))
 315         {
 316                 /* ERROR: codec error */
 317                 fprintf(stderr, "ERROR: encode error: %d\n", format);
 318                 return 0;
 319         }
 320
 321         if(samples-insize == 0)
 322         {
 323                 fprintf(stderr, "ERROR encoding (no samples output (samples=%d)\n", samples);
 324                 return -1;
 325         }
 326
 327         // Send the encoded audio data back to the app if required
 328         // TODO: fix the stupid way in which the encoded audio size is returned
 329         if ( iaxc_get_audio_prefs() & IAXC_AUDIO_PREF_RECV_LOCAL_ENCODED )
 330                 iaxci_do_audio_callback(callNo, 0, IAXC_SOURCE_LOCAL, 1,
 331                                 call->encoder->format & IAXC_AUDIO_FORMAT_MASK,
 332                                 sizeof(outbuf) - outsize, outbuf);
 333
 334         if(iax_send_voice(call->session,format, outbuf,
 335                                 sizeof(outbuf) - outsize, samples-insize) == -1)
 336         {
 337                 fprintf(stderr, "Failed to send voice! %s\n", iax_errstr);
 338                 return -1;
 339         }
 340
 341         return 0;
 342 }
 343
 344 /* decode encoded audio; return the number of bytes decoded
 345  * negative indicates error */
 346 int audio_decode_audio(struct iaxc_call * call, void * out, void * data, int len,
 347                 int format, int * samples)
 348 {
 349         int insize = len;
 350         int outsize = *samples;
 351
 352         timeLastOutput = iax_tvnow();
 353
 354         if ( format == 0 )
 355         {
 356                 fprintf(stderr, "audio_decode_audio: Format is zero (should't happen)!\n");
 357                 return -1;
 358         }
 359
 360         /* destroy decoder if it is incorrect type */
 361         if ( call->decoder && call->decoder->format != format )
 362         {
 363                 call->decoder->destroy(call->decoder);
 364                 call->decoder = NULL;
 365         }
 366
 367         /* create decoder if necessary */
 368         if ( !call->decoder )
 369         {
 370                 call->decoder = create_codec(format);
 371         }
 372
 373         if ( !call->decoder )
 374         {
 375                 fprintf(stderr, "ERROR: Codec could not be created: %d\n",
 376                                 format);
 377                 return -1;
 378         }
 379
 380         if ( call->decoder->decode(call->decoder,
 381                                 &insize, (unsigned char *)data,
 382                                 &outsize, (short *)out) )
 383         {
 384                 fprintf(stderr, "ERROR: decode error: %d\n", format);
 385                 return -1;
 386         }
 387
 388         output_postprocess(out, *samples - outsize);
 389
 390         *samples = outsize;
 391         return len - insize;
 392 }
 393
 394 EXPORT int iaxc_get_filters(void)
 395 {
 396         return iaxci_filters;
 397 }
 398
 399 EXPORT void iaxc_set_filters(int filters)
 400 {
 401         iaxci_filters = filters;
 402         set_speex_filters();
 403 }
 404
 405 EXPORT void iaxc_set_silence_threshold(float thr)
 406 {
 407         iaxci_silence_threshold = thr;
 408         set_speex_filters();
 409 }
 410