3rdparty/iaxclient/lib/audio_encode.c

   1 /*
   2  * iaxclient: a cross-platform IAX softphone library
   3  *
   4  * Copyrights:
   5  * Copyright (C) 2003-2006, Horizon Wimba, Inc.
   6  * Copyright (C) 2007, Wimba, Inc.
   7  *
   8  * Contributors:
   9  * Steve Kann <stevek@stevek.com>
  10  * Michael Van Donselaar <mvand@vandonselaar.org>
  11  * Shawn Lawrence <shawn.lawrence@terracecomm.com>
  12  *
  13  * This program is free software, distributed under the terms of
  14  * the GNU Lesser (Library) General Public License.
  15  */
  16
  17 #include "audio_encode.h"
  18 #include "iaxclient_lib.h"
  19 #include "iax-client.h"
  20 #ifdef CODEC_GSM
  21 #include "codec_gsm.h"
  22 #endif
  23 #include "codec_ulaw.h"
  24 #include "codec_alaw.h"
  25
  26 #include "codec_speex.h"
  27 #include <speex/speex_preprocess.h>
  28
  29 #ifdef CODEC_ILBC
  30 #include "codec_ilbc.h"
  31 #endif
  32
  33 float iaxci_silence_threshold = AUDIO_ENCODE_SILENCE_DB;
  34
  35 static float input_level = 0.0f;
  36 static float output_level = 0.0f;
  37
  38 static SpeexPreprocessState *st = NULL;
  39 static int speex_state_size = 0;
  40 static int speex_state_rate = 0;
  41
  42 int iaxci_filters = IAXC_FILTER_AGC|IAXC_FILTER_DENOISE|IAXC_FILTER_AAGC|IAXC_FILTER_CN;
  43
  44 /* use to measure time since last audio was processed */
  45 static struct timeval timeLastInput ;
  46 static struct timeval timeLastOutput ;
  47
  48 static struct iaxc_speex_settings speex_settings =
  49 {
  50         1,    /* decode_enhance */
  51         -1,   /* float quality */
  52         -1,   /* bitrate */
  53         0,    /* vbr */
  54         0,    /* abr */
  55         3     /* complexity */
  56 };
  57
  58
  59 static float vol_to_db(float vol)
  60 {
  61         /* avoid calling log10() on zero which yields inf or
  62          * negative numbers which yield nan */
  63         if ( vol <= 0.0f )
  64                 return AUDIO_ENCODE_SILENCE_DB;
  65         else
  66                 return log10f(vol) * 20.0f;
  67 }
  68
  69 static int do_level_callback()
  70 {
  71         static struct timeval last = {0,0};
  72         struct timeval now;
  73         float input_db;
  74         float output_db;
  75
  76         now = iax_tvnow();
  77
  78         if ( last.tv_sec != 0 && iaxci_usecdiff(&now, &last) < 100000 )
  79                 return 0;
  80
  81         last = now;
  82
  83         /* if input has not been processed in the last second, set to silent */
  84         input_db = iaxci_usecdiff(&now, &timeLastInput) < 1000000 ?
  85                         vol_to_db(input_level) : AUDIO_ENCODE_SILENCE_DB;
  86
  87         /* if output has not been processed in the last second, set to silent */
  88         output_db = iaxci_usecdiff(&now, &timeLastOutput) < 1000000 ?
  89                 vol_to_db(output_level) : AUDIO_ENCODE_SILENCE_DB;
  90
  91         iaxci_do_levels_callback(input_db, output_db);
  92
  93         return 0;
  94 }
  95
  96 static void set_speex_filters()
  97 {
  98         int i;
  99
 100         if ( !st )
 101                 return;
 102
 103         i = 1; /* always make VAD decision */
 104         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_VAD, &i);
 105         i = (iaxci_filters & IAXC_FILTER_AGC) ? 1 : 0;
 106         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_AGC, &i);
 107         i = (iaxci_filters & IAXC_FILTER_DENOISE) ? 1 : 0;
 108         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_DENOISE, &i);
 109
 110         /*
 111         * We can tweak these parameters to play with VAD sensitivity.
 112         * For now, we use the default values since it seems they are a good starting point.
 113         * However, if need be, this is the code that needs to change
 114         */
 115         i = 35;
 116         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_PROB_START, &i);
 117         i = 20;
 118         speex_preprocess_ctl(st, SPEEX_PREPROCESS_SET_PROB_CONTINUE, &i);
 119 }
 120
 121 static void calculate_level(short *audio, int len, float *level)
 122 {
 123         int big_sample = 0;
 124         int i;
 125
 126         for ( i = 0; i < len; i++ )
 127         {
 128                 const int sample = abs(audio[i]);
 129                 big_sample = sample > big_sample ?
 130                         sample : big_sample;
 131         }
 132
 133         *level += ((float)big_sample / 32767.0f - *level) / 5.0f;
 134 }
 135
 136
 137 static int input_postprocess(void *audio, int len, int rate)
 138 {
 139         static float lowest_volume = 1.0f;
 140         float volume;
 141         int silent = 0;
 142
 143         if ( !st || speex_state_size != len || speex_state_rate != rate )
 144         {
 145                 if (st)
 146                         speex_preprocess_state_destroy(st);
 147                 st = speex_preprocess_state_init(len,rate);
 148                 speex_state_size = len;
 149                 speex_state_rate = rate;
 150                 set_speex_filters();
 151         }
 152
 153         calculate_level((short *)audio, len, &input_level);
 154
 155         /* only preprocess if we're interested in VAD, AGC, or DENOISE */
 156         if ( (iaxci_filters & (IAXC_FILTER_DENOISE | IAXC_FILTER_AGC)) ||
 157                         iaxci_silence_threshold > 0.0f )
 158                 silent = !speex_preprocess(st, (spx_int16_t *)audio, NULL);
 159
 160         /* Analog AGC: Bring speex AGC gain out to mixer, with lots of hysteresis */
 161         /* use a higher continuation threshold for AAGC than for VAD itself */
 162         if ( !silent &&
 163              iaxci_silence_threshold != 0.0f &&
 164              (iaxci_filters & IAXC_FILTER_AGC) &&
 165              (iaxci_filters & IAXC_FILTER_AAGC)
 166            )
 167         {
 168                 static int i = 0;
 169
 170                 i++;
 171
 172                 if ( (i & 0x3f) == 0 )
 173                 {
 174                         float loudness;
 175 #ifdef SPEEX_PREPROCESS_GET_AGC_LOUDNESS
 176                         speex_preprocess_ctl(st, SPEEX_PREPROCESS_GET_AGC_LOUDNESS, &loudness);
 177 #else
 178                         loudness = st->loudness2;
 179 #endif
 180                         if ( loudness > 8000.0f || loudness < 4000.0f )
 181                         {
 182                                 const float level = iaxc_input_level_get();
 183
 184                                 if ( loudness > 16000.0f && level > 0.5f )
 185                                 {
 186                                         /* lower quickly if we're really too hot */
 187                                         iaxc_input_level_set(level - 0.2f);
 188                                 }
 189                                 else if ( loudness > 8000.0f && level >= 0.15f )
 190                                 {
 191                                         /* lower less quickly if we're a bit too hot */
 192                                         iaxc_input_level_set(level - 0.1f);
 193                                 }
 194                                 else if ( loudness < 4000.0f && level <= 0.9f )
 195                                 {
 196                                         /* raise slowly if we're cold */
 197                                         iaxc_input_level_set(level + 0.1f);
 198                                 }
 199                         }
 200                 }
 201         }
 202
 203         /* This is ugly. Basically just don't get volume level if speex thought
 204          * we were silent. Just set it to 0 in that case */
 205         if ( iaxci_silence_threshold > 0.0f && silent )
 206                 input_level = 0.0f;
 207
 208         do_level_callback();
 209
 210         volume = vol_to_db(input_level);
 211
 212         if ( volume < lowest_volume )
 213                 lowest_volume = volume;
 214
 215         if ( iaxci_silence_threshold > 0.0f )
 216                 return silent;
 217         else
 218                 return volume < iaxci_silence_threshold;
 219 }
 220
 221 static int output_postprocess(void *audio, int len)
 222 {
 223         calculate_level((short *)audio, len, &output_level);
 224
 225         do_level_callback();
 226
 227         return 0;
 228 }
 229
 230 static struct iaxc_audio_codec *create_codec(int format)
 231 {
 232         switch (format & IAXC_AUDIO_FORMAT_MASK)
 233         {
 234 #ifdef CODEC_GSM
 235         case IAXC_FORMAT_GSM:
 236                 return codec_audio_gsm_new();
 237 #endif
 238         case IAXC_FORMAT_ULAW:
 239                 return codec_audio_ulaw_new();
 240         case IAXC_FORMAT_ALAW:
 241                 return codec_audio_alaw_new();
 242         case IAXC_FORMAT_SPEEX:
 243                 return codec_audio_speex_new(&speex_settings);
 244 #ifdef CODEC_ILBC
 245         case IAXC_FORMAT_ILBC:
 246                 return codec_audio_ilbc_new();
 247 #endif
 248         default:
 249                 /* ERROR: codec not supported */
 250                 fprintf(stderr, "ERROR: Codec not supported: %d\n", format);
 251                 return NULL;
 252         }
 253 }
 254
 255 EXPORT void iaxc_set_speex_settings(int decode_enhance, float quality,
 256                 int bitrate, int vbr, int abr, int complexity)
 257 {
 258         speex_settings.decode_enhance = decode_enhance;
 259         speex_settings.quality = quality;
 260         speex_settings.bitrate = bitrate;
 261         speex_settings.vbr = vbr;
 262         speex_settings.abr = abr;
 263         speex_settings.complexity = complexity;
 264 }
 265
 266 int audio_send_encoded_audio(struct iaxc_call *call, int callNo, void *data,
 267                 int format, int samples)
 268 {
 269         unsigned char outbuf[1024];
 270         int outsize = 1024;
 271         int silent;
 272         int insize = samples;
 273
 274         /* update last input timestamp */
 275         timeLastInput = iax_tvnow();
 276
 277         silent = input_postprocess(data, insize, 8000);
 278
 279         if(silent)
 280         {
 281                 if(!call->tx_silent)
 282                 {  /* send a Comfort Noise Frame */
 283                         call->tx_silent = 1;
 284                         if ( iaxci_filters & IAXC_FILTER_CN )
 285                                 iax_send_cng(call->session, 10, NULL, 0);
 286                 }
 287                 return 0;  /* poof! no encoding! */
 288         }
 289
 290         /* we're going to send voice now */
 291         call->tx_silent = 0;
 292
 293         /* destroy encoder if it is incorrect type */
 294         if(call->encoder && call->encoder->format != format)
 295         {
 296                 call->encoder->destroy(call->encoder);
 297                 call->encoder = NULL;
 298         }
 299
 300         /* just break early if there's no format defined: this happens for the
 301          * first couple of frames of new calls */
 302         if(format == 0) return 0;
 303
 304         /* create encoder if necessary */
 305         if(!call->encoder)
 306         {
 307                 call->encoder = create_codec(format);
 308         }
 309
 310         if(!call->encoder)
 311         {
 312                 /* ERROR: no codec */
 313                 fprintf(stderr, "ERROR: Codec could not be created: %d\n", format);
 314                 return 0;
 315         }
 316
 317         if(call->encoder->encode(call->encoder, &insize, (short *)data,
 318                                 &outsize, outbuf))
 319         {
 320                 /* ERROR: codec error */
 321                 fprintf(stderr, "ERROR: encode error: %d\n", format);
 322                 return 0;
 323         }
 324
 325         if(samples-insize == 0)
 326         {
 327                 fprintf(stderr, "ERROR encoding (no samples output (samples=%d)\n", samples);
 328                 return -1;
 329         }
 330
 331         // Send the encoded audio data back to the app if required
 332         // TODO: fix the stupid way in which the encoded audio size is returned
 333         if ( iaxc_get_audio_prefs() & IAXC_AUDIO_PREF_RECV_LOCAL_ENCODED )
 334                 iaxci_do_audio_callback(callNo, 0, IAXC_SOURCE_LOCAL, 1,
 335                                 call->encoder->format & IAXC_AUDIO_FORMAT_MASK,
 336                                 sizeof(outbuf) - outsize, outbuf);
 337
 338         if(iax_send_voice(call->session,format, outbuf,
 339                                 sizeof(outbuf) - outsize, samples-insize) == -1)
 340         {
 341                 fprintf(stderr, "Failed to send voice! %s\n", iax_errstr);
 342                 return -1;
 343         }
 344
 345         return 0;
 346 }
 347
 348 /* decode encoded audio; return the number of bytes decoded
 349  * negative indicates error */
 350 int audio_decode_audio(struct iaxc_call * call, void * out, void * data, int len,
 351                 int format, int * samples)
 352 {
 353         int insize = len;
 354         int outsize = *samples;
 355
 356         timeLastOutput = iax_tvnow();
 357
 358         if ( format == 0 )
 359         {
 360                 fprintf(stderr, "audio_decode_audio: Format is zero (should't happen)!\n");
 361                 return -1;
 362         }
 363
 364         /* destroy decoder if it is incorrect type */
 365         if ( call->decoder && call->decoder->format != format )
 366         {
 367                 call->decoder->destroy(call->decoder);
 368                 call->decoder = NULL;
 369         }
 370
 371         /* create decoder if necessary */
 372         if ( !call->decoder )
 373         {
 374                 call->decoder = create_codec(format);
 375         }
 376
 377         if ( !call->decoder )
 378         {
 379                 fprintf(stderr, "ERROR: Codec could not be created: %d\n",
 380                                 format);
 381                 return -1;
 382         }
 383
 384         if ( call->decoder->decode(call->decoder,
 385                                 &insize, (unsigned char *)data,
 386                                 &outsize, (short *)out) )
 387         {
 388                 fprintf(stderr, "ERROR: decode error: %d\n", format);
 389                 return -1;
 390         }
 391
 392         output_postprocess(out, *samples - outsize);
 393
 394         *samples = outsize;
 395         return len - insize;
 396 }
 397
 398 EXPORT int iaxc_get_filters(void)
 399 {
 400         return iaxci_filters;
 401 }
 402
 403 EXPORT void iaxc_set_filters(int filters)
 404 {
 405         iaxci_filters = filters;
 406         set_speex_filters();
 407 }
 408
 409 EXPORT void iaxc_set_silence_threshold(float thr)
 410 {
 411         iaxci_silence_threshold = thr;
 412         set_speex_filters();
 413 }
 414