3rdparty/iaxclient/lib/spandsp/plc.h

   1 /*
   2  * SpanDSP - a series of DSP components for telephony
   3  *
   4  * plc.h
   5  *
   6  * Written by Steve Underwood <steveu@coppice.org>
   7  *
   8  * Copyright (C) 2004 Steve Underwood
   9  *
  10  * All rights reserved.
  11  *
  12  * This program is free software; you can redistribute it and/or modify
  13  * it under the terms of the GNU General Public License as published by
  14  * the Free Software Foundation; either version 2 of the License, or
  15  * (at your option) any later version.
  16  *
  17  * This program is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20  * GNU General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU General Public License
  23  * along with this program; if not, write to the Free Software
  24  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  25  *
  26  * This version may be optionally licenced under the GNU LGPL licence.
  27  * This version is disclaimed to DIGIUM for inclusion in the Asterisk project.
  28  */
  29
  30 /*! \file */
  31
  32 #if !defined(_PLC_H_)
  33 #define _PLC_H_
  34
  35 #ifdef SOLARIS
  36 #include <sys/int_types.h>
  37 #else
  38 #ifndef _MSC_VER
  39 #include <inttypes.h>
  40 #else
  41 typedef short int16_t;
  42 #endif
  43 #endif
  44
  45 /*! \page plc_page Packet loss concealment
  46 \section plc_page_sec_1 What does it do?
  47 The packet loss concealment module provides a suitable synthetic fill-in signal,
  48 to minimise the audible effect of lost packets in VoIP applications. It is not
  49 tied to any particular codec, and could be used with almost any codec which does not
  50 specify its own procedure for packet loss concealment.
  51
  52 Where a codec specific concealment procedure exists, the algorithm is usually built
  53 around knowledge of the characteristics of the particular codec. It will, therefore,
  54 generally give better results for that particular codec than this generic concealer will.
  55
  56 \section plc_page_sec_2 How does it work?
  57 While good packets are being received, the plc_rx() routine keeps a record of the trailing
  58 section of the known speech signal. If a packet is missed, plc_fillin() is called to produce
  59 a synthetic replacement for the real speech signal. The average mean difference function
  60 (AMDF) is applied to the last known good signal, to determine its effective pitch.
  61 Based on this, the last pitch period of signal is saved. Essentially, this cycle of speech
  62 will be repeated over and over until the real speech resumes. However, several refinements
  63 are needed to obtain smooth pleasant sounding results.
  64
  65 - The two ends of the stored cycle of speech will not always fit together smoothly. This can
  66   cause roughness, or even clicks, at the joins between cycles. To soften this, the
  67   1/4 pitch period of real speech preceeding the cycle to be repeated is blended with the last
  68   1/4 pitch period of the cycle to be repeated, using an overlap-add (OLA) technique (i.e.
  69   in total, the last 5/4 pitch periods of real speech are used).
  70
  71 - The start of the synthetic speech will not always fit together smoothly with the tail of
  72   real speech passed on before the erasure was identified. Ideally, we would like to modify
  73   the last 1/4 pitch period of the real speech, to blend it into the synthetic speech. However,
  74   it is too late for that. We could have delayed the real speech a little, but that would
  75   require more buffer manipulation, and hurt the efficiency of the no-lost-packets case
  76   (which we hope is the dominant case). Instead we use a degenerate form of OLA to modify
  77   the start of the synthetic data. The last 1/4 pitch period of real speech is time reversed,
  78   and OLA is used to blend it with the first 1/4 pitch period of synthetic speech. The result
  79   seems quite acceptable.
  80
  81 - As we progress into the erasure, the chances of the synthetic signal being anything like
  82   correct steadily fall. Therefore, the volume of the synthesized signal is made to decay
  83   linearly, such that after 50ms of missing audio it is reduced to silence.
  84
  85 - When real speech resumes, an extra 1/4 pitch period of sythetic speech is blended with the
  86   start of the real speech. If the erasure is small, this smoothes the transition. If the erasure
  87   is long, and the synthetic signal has faded to zero, the blending softens the start up of the
  88   real signal, avoiding a kind of "click" or "pop" effect that might occur with a sudden onset.
  89
  90 \section plc_page_sec_3 How do I use it?
  91 Before audio is processed, call plc_init() to create an instance of the packet loss
  92 concealer. For each received audio packet that is acceptable (i.e. not including those being
  93 dropped for being too late) call plc_rx() to record the content of the packet. Note this may
  94 modify the packet a little after a period of packet loss, to blend real synthetic data smoothly.
  95 When a real packet is not available in time, call plc_fillin() to create a sythetic substitute.
  96 That's it!
  97 */
  98
  99 #define SAMPLE_RATE     8000
 100
 101 /*! Minimum allowed pitch (66 Hz) */
 102 #define PLC_PITCH_MIN           120
 103 /*! Maximum allowed pitch (200 Hz) */
 104 #define PLC_PITCH_MAX           40
 105 /*! Maximum pitch OLA window */
 106 #define PLC_PITCH_OVERLAP_MAX   (PLC_PITCH_MIN >> 2)
 107 /*! The length over which the AMDF function looks for similarity (20 ms) */
 108 #define CORRELATION_SPAN        160
 109 /*! History buffer length. The buffer much also be at leat 1.25 times
 110     PLC_PITCH_MIN, but that is much smaller than the buffer needs to be for
 111     the pitch assessment. */
 112 #define PLC_HISTORY_LEN         (CORRELATION_SPAN + PLC_PITCH_MIN)
 113
 114 typedef struct
 115 {
 116     /*! Consecutive erased samples */
 117     int missing_samples;
 118     /*! Current offset into pitch period */
 119     int pitch_offset;
 120     /*! Pitch estimate */
 121     int pitch;
 122     /*! Buffer for a cycle of speech */
 123     float pitchbuf[PLC_PITCH_MIN];
 124     /*! History buffer */
 125     int16_t history[PLC_HISTORY_LEN];
 126     /*! Current pointer into the history buffer */
 127     int buf_ptr;
 128 } plc_state_t;
 129
 130
 131 #ifdef __cplusplus
 132 extern "C" {
 133 #endif
 134
 135 /*! Process a block of received audio samples.
 136     \brief Process a block of received audio samples.
 137     \param s The packet loss concealer context.
 138     \param amp The audio sample buffer.
 139     \param len The number of samples in the buffer.
 140     \return The number of samples in the buffer. */
 141 int plc_rx(plc_state_t *s, int16_t amp[], int len);
 142
 143 /*! Fill-in a block of missing audio samples.
 144     \brief Fill-in a block of missing audio samples.
 145     \param s The packet loss concealer context.
 146     \param amp The audio sample buffer.
 147     \param len The number of samples to be synthesised.
 148     \return The number of samples synthesized. */
 149 int plc_fillin(plc_state_t *s, int16_t amp[], int len);
 150
 151 /*! Process a block of received V.29 modem audio samples.
 152     \brief Process a block of received V.29 modem audio samples.
 153     \param s The packet loss concealer context.
 154     \param amp The audio sample buffer.
 155     \param len The number of samples in the buffer.
 156     \return A pointer to the he packet loss concealer context. */
 157 plc_state_t *plc_init(plc_state_t *s);
 158
 159 #ifdef __cplusplus
 160 }
 161 #endif
 162
 163 #endif
 164 /*- End of file ------------------------------------------------------------*/