3rdparty/iaxclient/lib/codec_theora.c

   1 /*
   2  * iaxclient: a cross-platform IAX softphone library
   3  *
   4  * Copyrights:
   5  * Copyright (C) 2003-2006, Horizon Wimba, Inc.
   6  * Copyright (C) 2007, Wimba, Inc.
   7  *
   8  * Contributors:
   9  * Steve Kann <stevek@stevek.com>
  10  * Mihai Balea <mihai at hates dot ms>
  11  *
  12  * This program is free software, distributed under the terms of
  13  * the GNU Lesser (Library) General Public License.
  14  */
  15
  16 /*
  17  * Some comments about Theora streaming
  18  * Theora video codec has two problems when it comes to streaming
  19  * and broadcasting video:
  20  *
  21  * - Large headers that need to be passed from the encoder to the decoder
  22  *   to initialize it. The conventional wisdom says we should transfer the
  23  *   headers out of band, but that complicates things with IAX, which does
  24  *   not have a separate signalling channel. Also, it makes things really
  25  *   difficult in a video conference scenario, where video gets switched
  26  *   between participants regularly. To solve this issue, we initialize
  27  *   the encoder and the decoder at the same time, using the headers from
  28  *   the local encoder to initialize the decoder. This works if the
  29  *   endpoints use the exact same version of Theora and the exact same
  30  *   parameters for initialization.
  31  *
  32  * - No support for splitting the frame into multiple slices.  Frames can
  33  *   be relatively large. For a 320x240 video stream, you can see key
  34  *   frames larger than 9KB, which is the maximum UDP packet size on Mac
  35  *   OS X. To work around this limitation, we use the slice API to fragment
  36  *   encoded frames to a reasonable size that UDP can safely transport
  37  *
  38  * Other miscellaneous comments:
  39  *
  40  * - For quality reasons, when we detect a video stream switch, we reject all
  41  *   incoming frames until we receive a key frame.
  42  *
  43  * - Theora only accepts video that has dimensions multiple of 16. If we combine
  44  *   his with a 4:3 aspect ratio requirement, we get a very limited number
  45  *   of available resolutions. To work around this limitation, we pad the video
  46  *   on encoding, up to the closest multiple of 16. On the decoding side, we
  47  *   remove the padding. This way, video resolution can be any multiple of 2
  48  *
  49  * We should probably look more into this (how to deal with missing and
  50  * out of order slices)
  51  */
  52
  53 #include <stdlib.h>
  54 #include "iaxclient_lib.h"
  55 #include "video.h"
  56 #include "slice.h"
  57 #include "codec_theora.h"
  58 #include <theora/theora.h>
  59
  60 #define MAX_SLICE_SIZE          8000
  61
  62 struct theora_decoder
  63 {
  64         theora_state            td;
  65         theora_info             ti;
  66         theora_comment          tc;
  67         struct deslicer_context *dsc;
  68         int                     got_key_frame;
  69 };
  70
  71 struct theora_encoder
  72 {
  73         theora_state          td;
  74         theora_info           ti;
  75         theora_comment        tc;
  76         int                   needs_padding;
  77         struct slicer_context *sc;
  78         unsigned char         *pad_buffer;
  79 };
  80
  81 static void destroy( struct iaxc_video_codec *c)
  82 {
  83         struct theora_encoder *e;
  84         struct theora_decoder *d;
  85
  86         if ( !c )
  87                 return;
  88
  89         if ( c->encstate )
  90         {
  91                 e = (struct theora_encoder *)c->encstate;
  92                 if ( e->pad_buffer )
  93                         free(e->pad_buffer);
  94                 if ( e->sc )
  95                         free_slicer_context(e->sc);
  96                 theora_comment_clear(&e->tc);
  97                 theora_info_clear(&e->ti);
  98                 theora_clear(&e->td);
  99                 free(e);
 100         }
 101         if ( c->decstate )
 102         {
 103                 d = (struct theora_decoder *)c->decstate;
 104                 if ( d->dsc )
 105                         free_deslicer_context(d->dsc);
 106                 theora_comment_clear(&d->tc);
 107                 theora_info_clear(&d->ti);
 108                 theora_clear(&d->td);
 109                 free(c->decstate);
 110         }
 111         free(c);
 112 }
 113
 114 static int decode(struct iaxc_video_codec *c, int inlen, const char *in,
 115                 int *outlen, char *out)
 116 {
 117         struct theora_decoder *d;
 118         ogg_packet            op;
 119         yuv_buffer            picture;
 120         unsigned int          line;
 121         int                   my_out_len;
 122         int                   w, h, ph;
 123         int                   flen;
 124         char                  *frame;
 125
 126         // Sanity checks
 127         if ( !c || !c->decstate || !in || inlen <= 0 || !out || !outlen )
 128                 return -1;
 129
 130         // Assemble slices
 131         d = (struct theora_decoder *)c->decstate;
 132         if ( !d->dsc )
 133                 return -1;
 134
 135         frame = deslice(in, inlen, &flen, d->dsc);
 136         if ( frame == NULL )
 137                 return 1;
 138
 139         /* decode into an OP structure */
 140         memset(&op, 0, sizeof(op));
 141         op.bytes = flen;
 142         op.packet = (unsigned char *)frame;
 143
 144         /* reject all incoming frames until we get a key frame */
 145         if ( !d->got_key_frame )
 146         {
 147                 if ( theora_packet_iskeyframe(&op) )
 148                         d->got_key_frame = 1;
 149                 else
 150                         return 1;
 151         }
 152
 153         if ( theora_decode_packetin(&d->td, &op) == OC_BADPACKET )
 154         {
 155                 fprintf(stderr,
 156                         "codec_theora: warning: theora_decode_packetin says bad packet\n");
 157                 return -1;
 158         }
 159
 160         w = d->ti.frame_width;
 161         h = d->ti.frame_height;
 162         ph = d->ti.height;
 163
 164         my_out_len = d->ti.frame_width * d->ti.frame_height * 3 / 2;
 165
 166         /* make sure we have enough room for the goodies */
 167         if ( *outlen < my_out_len )
 168         {
 169                 fprintf(stderr, "codec_theora: not enough room for decoding\n");
 170                 return -1;
 171         }
 172
 173         /* finally, here's where we get our goodies */
 174         if ( theora_decode_YUVout(&d->td, &picture) )
 175         {
 176                 fprintf(stderr, "codec_theora: error getting our goodies\n");
 177                 return -1;
 178         }
 179
 180         //clear output
 181         memset(out, 127, my_out_len);
 182
 183         for( line = 0 ; line < d->ti.frame_height / 2 ; line++ )
 184         {
 185                 // Y-even
 186                 memcpy(out + picture.y_width * 2 * line,
 187                        picture.y + 2 * line * picture.y_stride,
 188                        picture.y_width);
 189                 // Y-odd
 190                 memcpy(out + picture.y_width * (2 * line + 1),
 191                        picture.y + (2 * line + 1) * picture.y_stride,
 192                        picture.y_width);
 193                 // U + V
 194                 memcpy(out + (d->ti.frame_width * d->ti.frame_height) + line * d->ti.frame_width / 2,
 195                        picture.u + line * picture.uv_stride,
 196                        picture.uv_width);
 197                 memcpy(out + (d->ti.frame_width * d->ti.frame_height * 5 / 4) + line * d->ti.frame_width / 2,
 198                        picture.v + line * picture.uv_stride,
 199                        picture.uv_width);
 200         }
 201
 202         *outlen = my_out_len;
 203
 204         return 0;
 205 }
 206
 207 // Pads a w by h frame to bring it up to pw by ph size using value
 208 static void pad_channel(const char *src, int w, int h, unsigned char *dst,
 209                 int pw, int ph, unsigned char value)
 210 {
 211         int i;
 212
 213         if ( w == pw )
 214         {
 215                 // We don't need to pad each line, just copy the data
 216                 memcpy(dst, src, w * h);
 217         } else
 218         {
 219                 // We DO need to pad each line
 220                 for ( i=0 ; i<h ; i++ )
 221                 {
 222                         memcpy(&dst[i*pw], &src[i*w], w);
 223                         memset(&dst[i*pw+w], value, pw-w);
 224                 }
 225         }
 226         // Pad the bottom of the frame if necessary
 227         if ( h < ph )
 228                 memset(dst + pw * h, value, (ph - h) * pw);
 229 }
 230
 231 static int encode(struct iaxc_video_codec * c, int inlen, const char * in,
 232                 struct slice_set_t * slice_set)
 233 {
 234         struct theora_encoder   *e;
 235         ogg_packet              op;
 236         yuv_buffer              picture;
 237
 238         // Sanity checks
 239         if ( !c || !c->encstate || !in || !slice_set )
 240                 return -1;
 241
 242         e = (struct theora_encoder *)c->encstate;
 243
 244         // Prepare the YUV buffer
 245         if ( e->needs_padding )
 246         {
 247                 // We copy a padded image into the pad buffer and set up the pointers
 248                 // Use pad_channel for each of the YUV channels
 249                 // Use a pad value of 0 for luma and 128 for chroma
 250                 pad_channel(in,
 251                                 e->ti.frame_width,
 252                                 e->ti.frame_height,
 253                                 e->pad_buffer,
 254                                 e->ti.width,
 255                                 e->ti.height,
 256                                 0);
 257
 258                 pad_channel(in + e->ti.frame_width * e->ti.frame_height,
 259                                 e->ti.frame_width / 2,
 260                                 e->ti.frame_height / 2,
 261                                 e->pad_buffer + e->ti.width * e->ti.height,
 262                                 e->ti.width / 2,
 263                                 e->ti.height / 2,
 264                                 128);
 265
 266                 pad_channel(in + e->ti.frame_width * e->ti.frame_height * 5 / 4,
 267                                 e->ti.frame_width / 2,
 268                                 e->ti.frame_height / 2,
 269                                 e->pad_buffer + e->ti.width * e->ti.height * 5 / 4,
 270                                 e->ti.width / 2,
 271                                 e->ti.height / 2,
 272                                 128);
 273
 274                 picture.y = e->pad_buffer;
 275         } else
 276         {
 277                 // use the original buffer
 278                 picture.y = (unsigned char *)in;
 279         }
 280         picture.u = picture.y + e->ti.width * e->ti.height;
 281         picture.v = picture.u + e->ti.width * e->ti.height / 4;
 282         picture.y_width = e->ti.width;
 283         picture.y_height = e->ti.height;
 284         picture.y_stride = e->ti.width;
 285         picture.uv_width = e->ti.width / 2;
 286         picture.uv_height = e->ti.height / 2;
 287         picture.uv_stride = e->ti.width / 2;
 288
 289         // Send data in for encoding
 290         if ( theora_encode_YUVin(&e->td, &picture) )
 291         {
 292                 fprintf(stderr, "codec_theora: failed theora_encode_YUVin\n");
 293                 return -1;
 294         }
 295
 296         // Get data from the encoder
 297         if ( theora_encode_packetout(&e->td, 0, &op) != 1 )
 298         {
 299                 fprintf(stderr, "codec_theora: failed theora_encode_packetout\n");
 300                 return -1;
 301         }
 302
 303         // Check to see if we have a key frame
 304         slice_set->key_frame = theora_packet_iskeyframe(&op) == 1;
 305
 306         // Slice the frame
 307         slice((char *)op.packet, op.bytes, slice_set, e->sc);
 308
 309         return 0;
 310 }
 311
 312 struct iaxc_video_codec *codec_video_theora_new(int format, int w, int h,
 313                 int framerate, int bitrate, int fragsize)
 314 {
 315         struct iaxc_video_codec *c;
 316         struct theora_encoder   *e;
 317         struct theora_decoder   *d;
 318         unsigned short          source_id;
 319         ogg_packet              headerp, commentp, tablep;
 320
 321         /* Basic sanity checks */
 322         if ( w <= 0 || h <= 0 || framerate <= 0 || bitrate <= 0 || fragsize <= 0 )
 323         {
 324                 fprintf(stderr, "codec_theora: bogus codec params: %d %d %d %d %d\n",
 325                                 w, h, framerate, bitrate, fragsize);
 326                 return NULL;
 327         }
 328
 329         if ( w % 2 || h % 2 )
 330         {
 331                 fprintf(stderr, "codec_theora: video dimensions must be multiples of 2\n");
 332                 return NULL;
 333         }
 334
 335         if ( fragsize > MAX_SLICE_SIZE )
 336                 fragsize = MAX_SLICE_SIZE;
 337
 338         c = (struct iaxc_video_codec *)calloc(sizeof(struct iaxc_video_codec), 1);
 339
 340         if ( !c )
 341                 goto bail;
 342
 343         c->decstate = calloc(sizeof(struct theora_decoder), 1);
 344
 345         if ( !c->decstate )
 346                 goto bail;
 347
 348         c->encstate = calloc(sizeof(struct theora_encoder), 1);
 349
 350         if ( !c->encstate )
 351                 goto bail;
 352
 353         c->format = format;
 354         c->width = w;
 355         c->height = h;
 356         c->framerate = framerate;
 357         c->bitrate = bitrate;
 358         c->fragsize = fragsize;
 359
 360         c->encode = encode;
 361         c->decode = decode;
 362         c->destroy = destroy;
 363
 364         e = (struct theora_encoder *)c->encstate;
 365         d = (struct theora_decoder *)c->decstate;
 366
 367         // Initialize slicer
 368         // Generate random source id
 369         srand((unsigned int)time(0));
 370         source_id = rand() & 0xffff;
 371         e->sc = create_slicer_context(source_id, fragsize);
 372         if ( !e->sc )
 373                 goto bail;
 374
 375
 376         /* set up some parameters in the contexts */
 377
 378         theora_info_init(&e->ti);
 379
 380         /* set up common parameters */
 381         e->ti.frame_width = w;
 382         e->ti.frame_height = h;
 383         e->ti.width = ((w - 1) / 16 + 1) * 16;
 384         e->ti.height = ((h - 1) / 16 + 1) * 16;
 385         e->ti.offset_x = 0;
 386         e->ti.offset_y = 0;
 387
 388         // We set up a padded frame with dimensions that are multiple of 16
 389         // We allocate a buffer to hold this frame
 390         e->needs_padding = e->ti.width != e->ti.frame_width ||
 391                 e->ti.height != e->ti.frame_height;
 392
 393         if ( e->needs_padding )
 394         {
 395                 e->pad_buffer = (unsigned char *)
 396                         malloc(e->ti.width * e->ti.height * 3 / 2);
 397
 398                 if ( !e->pad_buffer )
 399                         goto bail;
 400         }
 401         else
 402         {
 403                 e->pad_buffer = 0;
 404         }
 405
 406         e->ti.fps_numerator = framerate;
 407         e->ti.fps_denominator = 1;
 408
 409         e->ti.aspect_numerator = 1;
 410         e->ti.aspect_denominator = 1;
 411
 412         e->ti.colorspace = OC_CS_UNSPECIFIED;
 413         e->ti.pixelformat = OC_PF_420;
 414
 415         e->ti.target_bitrate = bitrate;
 416
 417         e->ti.quality = 0;
 418
 419         e->ti.dropframes_p = 0;
 420         e->ti.quick_p = 1;
 421         e->ti.keyframe_auto_p = 0;
 422         e->ti.keyframe_frequency = framerate;
 423         e->ti.keyframe_frequency_force = framerate;
 424         e->ti.keyframe_data_target_bitrate = bitrate * 3;
 425         e->ti.keyframe_auto_threshold = 80;
 426         e->ti.keyframe_mindistance = 8;
 427         e->ti.noise_sensitivity = 0;
 428
 429         if ( theora_encode_init(&e->td, &e->ti) )
 430                 goto bail;
 431
 432         // Obtain the encoder headers and set up the decoder headers from
 433         // data in the encoder headers
 434         memset(&headerp, 0, sizeof(headerp));
 435         memset(&commentp, 0, sizeof(commentp));
 436         memset(&tablep, 0, sizeof(tablep));
 437
 438         // Set up the decoder using the encoder headers
 439         theora_info_init(&d->ti);
 440         theora_comment_init(&d->tc);
 441         theora_comment_init(&e->tc);
 442
 443         if ( theora_encode_header(&e->td, &headerp) )
 444                 goto bail;
 445
 446         headerp.b_o_s = 1;
 447
 448         if ( theora_decode_header(&d->ti, &d->tc, &headerp) )
 449                 goto bail;
 450
 451         if ( theora_encode_comment(&e->tc, &commentp) )
 452                 goto bail;
 453
 454         if ( theora_decode_header(&d->ti, &d->tc, &commentp) )
 455                 goto bail;
 456
 457         theora_comment_clear(&e->tc);
 458
 459         if ( theora_encode_tables(&e->td, &tablep) )
 460                 goto bail;
 461
 462         if ( theora_decode_header(&d->ti, &d->tc, &tablep) )
 463                 goto bail;
 464
 465         if ( theora_decode_init(&d->td, &d->ti) )
 466                 goto bail;
 467
 468         d->got_key_frame = 0;
 469
 470         // Initialize deslicer context
 471         d->dsc = create_deslicer_context(c->fragsize);
 472         if ( !d->dsc )
 473                 goto bail;
 474
 475         strcpy(c->name, "Theora");
 476         return c;
 477
 478 bail:
 479         fprintf(stderr, "codec_theora: failed to initialize encoder or decoder\n");
 480
 481         if ( c )
 482         {
 483                 if ( c->encstate )
 484                 {
 485                         e = (struct theora_encoder *)c->encstate;
 486                         if ( e->sc )
 487                                 free_slicer_context(e->sc);
 488                         free(c->encstate);
 489                 }
 490                 if ( c->decstate )
 491                 {
 492                         d = (struct theora_decoder *)c->decstate;
 493                         if ( d->dsc )
 494                                 free_deslicer_context(d->dsc);
 495                         free(c->decstate);
 496                 }
 497                 free(c);
 498         }
 499
 500         return NULL;
 501 }
 502