[MPlayer-dev-eng] [RFC] Speex support

Wed Nov 2 22:40:01 CET 2005

Hi,
On Tue, Nov 01, 2005 at 06:00:16PM +0100, Reimar Döffinger wrote:
> the attached patch adds support for speex decoding. Please test, esp.
> the configure part.
> Known bugs:
> 1) the ogg demuxer does not create valid timestamps. I do not intend to
> that (flac in ogg has the same problem).
> 2) speex files with more than one frame per ogg packet will not work
> (I do not yet know what is needed to fix that).

Fixed 2) and also fixed stereo files (stereo in a speech codec... I sure
have difficulty understanding the sense in this, but well...).
I do not intend to fix 1) and a related problem that the first, comment
packet isn't discarded. Though I sure would be happy if someone familiar
with demux_ogg would fix these.

Greetings,
Reimar Döffinger
-------------- next part --------------
Index: Makefile
===================================================================
RCS file: /cvsroot/mplayer/main/Makefile,v
retrieving revision 1.330
diff -u -r1.330 Makefile

--- Makefile	19 Aug 2005 19:24:30 -0000	1.330
+++ Makefile	2 Nov 2005 21:34:32 -0000
@@ -104,6 +104,7 @@
              $(XMMS_LIB) \
              $(X264_LIB) \
              $(MUSEPACK_LIB) \
+             $(SPEEX_LIB) \
 
 COMMON_LIBS = libmpcodecs/libmpcodecs.a \
               $(W32_LIB) \
Index: configure
===================================================================
RCS file: /cvsroot/mplayer/main/configure,v
retrieving revision 1.1096
diff -u -r1.1096 configure
--- configure	26 Oct 2005 20:40:19 -0000	1.1096
+++ configure	2 Nov 2005 21:34:49 -0000
@@ -243,6 +243,7 @@
   --enable-tremor-low    build with lower accuracy internal tremor [disabled]
   --enable-external-tremor build with external tremor [disabled]
   --disable-vorbis       disable OggVorbis support entirely [autodetect]
+  --disable-speex        disable Speex support entirely [autodetect]
   --enable-theora        build with OggTheora support [autodetect]
   --disable-internal-matroska disable internal Matroska support [enabled]
   --enable-external-faad build with external FAAD2 (AAC) support [autodetect]
@@ -1486,6 +1487,7 @@
 _tremor_internal=yes
 _tremor_low=no
 _vorbis=auto
+_speex=auto
 _theora=auto
 _mp3lib=yes
 _liba52=yes
@@ -1679,6 +1681,8 @@
   --disable-liblzo)	_liblzo=no		;;
   --enable-vorbis)	_vorbis=yes	;;
   --disable-vorbis)	_vorbis=no	;;
+  --enable-speex)	_speex=yes	;;
+  --disable-speex)	_speex=no	;;
   --enable-internal-tremor)	_tremor_internal=yes	;;
   --disable-internal-tremor)	_tremor_internal=no	;;
   --enable-tremor-low)	_tremor_low=yes	;;
@@ -5389,6 +5393,29 @@
 fi
 echores "$_vorbis"
 
+echocheck "libspeex (version >= 1.1 required)"
+if test "$_speex" = auto ; then
+  _speex=no
+  cat > $TMPC << EOF
+#include <speex/speex.h>
+int main(void) {
+  SpeexBits bits;
+  void *dec;
+  speex_decode_int(dec, &bits, dec);
+}
+EOF
+  cc_check -lspeex $_ld_lm && _speex=yes
+fi
+if test "$_speex" = yes ; then
+  _def_speex='#define HAVE_SPEEX 1'
+  _ld_speex='-lspeex'
+  _codecmodules="speex $_codecmodules"
+else
+  _def_speex='#undef HAVE_SPEEX'
+  _nocodecmodules="speex $_nocodecmodules"
+fi
+echores "$_speex"
+
 echocheck "OggTheora support"
 if test "$_theora" = auto ; then
   _theora=no
@@ -6977,6 +7004,7 @@
 TREMOR = $_tremor_internal
 TREMOR_FLAGS = $_tremor_flags
 
+SPEEX = $_speex
 MUSEPACK = $_musepack
 
 UNRARLIB = $_unrarlib
@@ -7086,6 +7114,7 @@
 LIBLZO_LIB= $_ld_liblzo
 MAD_LIB = $_ld_mad
 VORBIS_LIB = $_ld_vorbis $_ld_libdv
+SPEEX_LIB = $_ld_speex
 THEORA_LIB = $_ld_theora
 FAAD_LIB = $_ld_faad
 INTERNAL_FAAD = $_faad_internal
@@ -7664,6 +7693,9 @@
 /* enable Tremor as vorbis decoder */
 $_def_tremor
 
+/* enable Speex support */
+$_def_speex
+
 /* enable musepack support */
 $_def_musepack
 
Index: etc/codecs.conf
===================================================================
RCS file: /cvsroot/mplayer/main/etc/codecs.conf,v
retrieving revision 1.436
diff -u -r1.436 codecs.conf
--- etc/codecs.conf	22 Oct 2005 13:53:18 -0000	1.436
+++ etc/codecs.conf	2 Nov 2005 21:34:54 -0000
@@ -2628,6 +2628,15 @@
 ;  driver acm
 ;  dll "vorbis.acm"
 
+audiocodec speex
+  info "Speex Audio Decoder"
+  status working
+  comment "Speex driver using libspeex"
+  fourcc 'spx '
+  format 0x9999
+  driver speex
+  dll "libspeex"
+
 audiocodec vivoaudio
   info "Vivo G.723/Siren Audio Codec"
   status working
Index: libmpcodecs/Makefile
===================================================================
RCS file: /cvsroot/mplayer/main/libmpcodecs/Makefile,v
retrieving revision 1.151
diff -u -r1.151 Makefile
--- libmpcodecs/Makefile	19 Sep 2005 15:23:15 -0000	1.151
+++ libmpcodecs/Makefile	2 Nov 2005 21:34:56 -0000
@@ -209,6 +209,10 @@
 AUDIO_SRCS += ad_mpc.c
 endif
 
+ifeq ($(SPEEX),yes)
+AUDIO_SRCS += ad_speex.c
+endif
+
 ifeq ($(FAAC),yes)
 ENCODER_SRCS += ae_faac.c
 endif
Index: libmpcodecs/ad.c
===================================================================
RCS file: /cvsroot/mplayer/main/libmpcodecs/ad.c,v
retrieving revision 1.21
diff -u -r1.21 ad.c
--- libmpcodecs/ad.c	10 Jul 2005 17:14:11 -0000	1.21
+++ libmpcodecs/ad.c	2 Nov 2005 21:34:56 -0000
@@ -33,6 +33,7 @@
 extern ad_functions_t mpcodecs_ad_msgsm;
 extern ad_functions_t mpcodecs_ad_faad;
 extern ad_functions_t mpcodecs_ad_libvorbis;
+extern ad_functions_t mpcodecs_ad_libspeex;
 extern ad_functions_t mpcodecs_ad_libmad;
 extern ad_functions_t mpcodecs_ad_realaud;
 extern ad_functions_t mpcodecs_ad_libdv;
@@ -78,6 +79,9 @@
 #ifdef HAVE_OGGVORBIS
   &mpcodecs_ad_libvorbis,
 #endif
+#ifdef HAVE_SPEEX
+  &mpcodecs_ad_libspeex,
+#endif
 #ifdef USE_LIBMAD
   &mpcodecs_ad_libmad,
 #endif
Index: libmpcodecs/ad_speex.c
===================================================================
RCS file: libmpcodecs/ad_speex.c
diff -N libmpcodecs/ad_speex.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ libmpcodecs/ad_speex.c	2 Nov 2005 21:34:56 -0000
@@ -0,0 +1,113 @@
+/**
+ * Speex decoder by Reimar DÃ¶ffinger <Reimar.Doeffinger at stud.uni-karlsruhe.de>
+ * License: GPL
+ * This code may be be relicensed under the terms of the GNU LGPL when it
+ * becomes part of the FFmpeg project (ffmpeg.org)
+ */
+#include "config.h"
+#include <speex/speex.h>
+#include <speex/speex_stereo.h>
+#include <speex/speex_header.h>
+#include "ad_internal.h"
+
+static ad_info_t info = {
+  "Speex audio decoder",
+  "speex",
+  "Reimar DÃ¶ffinger",
+  "",
+  ""
+};
+
+LIBAD_EXTERN(libspeex)
+
+typedef struct {
+  SpeexBits bits;
+  void *dec_context;
+  SpeexStereoState stereo;
+  SpeexHeader *hdr;
+} context_t;
+
+static int preinit(sh_audio_t *sh) {
+  return 1;
+}
+
+static int init(sh_audio_t *sh) {
+  context_t *ctx = (context_t *)calloc(1, sizeof(context_t));
+  const SpeexMode *spx_mode;
+  const SpeexStereoState st_st = SPEEX_STEREO_STATE_INIT; // hack
+  int mode;
+  if (!sh->wf || sh->wf->cbSize < 80) {
+    mp_msg(MSGT_DECAUDIO, MSGL_FATAL, "Missing extradata!\n");
+    return 0;
+  }
+  ctx->hdr = speex_packet_to_header((char *)&sh->wf[1], sh->wf->cbSize);
+  if (ctx->hdr->nb_channels != 1 && ctx->hdr->nb_channels != 2) {
+    mp_msg(MSGT_DECAUDIO, MSGL_WARN, "Invalid number of channels (%i), "
+            "assuming mono\n", ctx->hdr->nb_channels);
+    ctx->hdr->nb_channels = 1;
+  }
+  switch (ctx->hdr->mode) {
+    case 0:
+      spx_mode = &speex_nb_mode; break;
+    case 1:
+      spx_mode = &speex_wb_mode; break;
+    case 2:
+      spx_mode = &speex_uwb_mode; break;
+    default:
+      mp_msg(MSGT_DECAUDIO, MSGL_WARN, "Unknown speex mode (%i)\n", mode);
+      spx_mode = &speex_nb_mode;
+  }
+  ctx->dec_context = speex_decoder_init(spx_mode);
+  speex_bits_init(&ctx->bits);
+  memcpy(&ctx->stereo, &st_st, sizeof(ctx->stereo)); // hack part 2
+  sh->channels = ctx->hdr->nb_channels;
+  sh->samplerate = ctx->hdr->rate;
+  sh->samplesize = 2;
+  sh->sample_format = AF_FORMAT_S16_NE;
+  sh->context = ctx;
+  return 1;
+}
+
+static void uninit(sh_audio_t *sh) {
+  context_t *ctx = sh->context;
+  if (ctx) {
+    speex_bits_destroy(&ctx->bits);
+    speex_decoder_destroy(ctx->dec_context);
+    if (ctx->hdr)
+      speex_free(ctx->hdr);
+    free(ctx);
+  }
+  ctx = NULL;
+}
+
+static int decode_audio(sh_audio_t *sh, unsigned char *buf,
+                        int minlen, int maxlen) {
+  context_t *ctx = sh->context;
+  int len, framelen, framesamples;
+  char *packet;
+  int i, err;
+  speex_decoder_ctl(ctx->dec_context, SPEEX_GET_FRAME_SIZE, &framesamples);
+  framelen = framesamples * ctx->hdr->nb_channels * sizeof(short);
+  if (maxlen < ctx->hdr->frames_per_packet * framelen) {
+    mp_msg(MSGT_DECAUDIO, MSGL_V, "maxlen too small in decode_audio\n");
+    return -1;
+  }
+  len = ds_get_packet(sh->ds, (unsigned char **)&packet);
+  if (len <= 0) return -1;
+  speex_bits_read_from(&ctx->bits, packet, len);
+  i = ctx->hdr->frames_per_packet;
+  do {
+    err = speex_decode_int(ctx->dec_context, &ctx->bits, (short *)buf);
+    if (err == -2)
+      mp_msg(MSGT_DECAUDIO, MSGL_ERR, "Error decoding file.\n");
+    if (ctx->hdr->nb_channels == 2)
+      speex_decode_stereo_int((short *)buf, framesamples, &ctx->stereo);
+    buf = &buf[framelen];
+  } while (--i > 0);
+  return ctx->hdr->frames_per_packet * framelen;
+}
+
+static int control(sh_audio_t *sh, int cmd, void *arg, ...) {
+  return CONTROL_UNKNOWN;
+}
+
Index: libmpdemux/demux_ogg.c
===================================================================
RCS file: /cvsroot/mplayer/main/libmpdemux/demux_ogg.c,v
retrieving revision 1.82
diff -u -r1.82 demux_ogg.c
--- libmpdemux/demux_ogg.c	1 Nov 2005 16:12:53 -0000	1.82
+++ libmpdemux/demux_ogg.c	2 Nov 2005 21:35:01 -0000
@@ -16,6 +16,7 @@
 #include "stheader.h"
 
 #define FOURCC_VORBIS mmioFOURCC('v', 'r', 'b', 's')
+#define FOURCC_SPEEX  mmioFOURCC('s', 'p', 'x', ' ')
 #define FOURCC_THEORA mmioFOURCC('t', 'h', 'e', 'o')
 
 #ifdef TREMOR
@@ -116,6 +117,7 @@
   ogg_stream_state stream;
   int hdr_packets;
   int vorbis;
+  int speex;
   int theora;
   int flac;
   int text;
@@ -352,6 +354,8 @@
        os->lastsize = blocksize;
        os->lastpos = pack->granulepos;
     }
+  } else if (os->speex) {
+    data = pack->packet;
 # ifdef HAVE_OGGTHEORA
   } else if (os->theora) {
      /* we pass complete packets to theora, mustn't strip the header! */
@@ -540,7 +544,7 @@
   // (PACKET_TYPE_HEADER bit doesn't even exist for theora ?!)
   // We jump nothing for FLAC. Ain't this great? Packet contents have to be
   // handled differently for each and every stream type. The joy! The joy!
-  if(!os->flac && ((*pack->packet & PACKET_TYPE_HEADER) && 
+  if(!os->flac && !os->speex && ((*pack->packet & PACKET_TYPE_HEADER) && 
      (ds != d->audio || ( ((sh_audio_t*)ds->sh)->format != FOURCC_VORBIS || os->hdr_packets >= NUM_VORBIS_HDR_PACKETS ) ) &&
      (ds != d->video || (((sh_video_t*)ds->sh)->format != FOURCC_THEORA))))
     return 0;
@@ -916,6 +920,27 @@
       ogg_d->subs[ogg_d->num_sub].id = n_audio;
       n_audio++;
       mp_msg(MSGT_DEMUX,MSGL_INFO,"[Ogg] stream %d: audio (Vorbis), -aid %d\n",ogg_d->num_sub,n_audio-1);
+    } else if (pack.bytes >= 80 && !strncmp(pack.packet,"Speex", 5)) {
+      sh_a = new_sh_audio(demuxer, ogg_d->num_sub);
+      sh_a->wf = (WAVEFORMATEX*)calloc(1, sizeof(WAVEFORMATEX) + pack.bytes);
+      sh_a->format = FOURCC_SPEEX;
+      sh_a->samplerate = sh_a->wf->nSamplesPerSec = get_uint32(&pack.packet[36]);
+      sh_a->channels = sh_a->wf->nChannels = get_uint32(&pack.packet[48]);
+      sh_a->wf->wFormatTag = sh_a->format;
+      sh_a->wf->nAvgBytesPerSec = get_uint32(&pack.packet[52]);
+      sh_a->wf->nBlockAlign = 0;
+      sh_a->wf->wBitsPerSample = 16;
+      sh_a->samplesize = 2;
+      sh_a->wf->cbSize = pack.bytes;
+      memcpy(&sh_a->wf[1], pack.packet, pack.bytes);
+
+      ogg_d->subs[ogg_d->num_sub].samplerate = sh_a->samplerate;
+      ogg_d->subs[ogg_d->num_sub].speex = 1;
+      if (identify)
+        mp_msg(MSGT_GLOBAL, MSGL_INFO, "ID_AUDIO_ID=%d\n", n_audio);
+      ogg_d->subs[ogg_d->num_sub].id = n_audio;
+      n_audio++;
+      mp_msg(MSGT_DEMUX,MSGL_INFO,"[Ogg] stream %d: audio (Speex), -aid %d\n",ogg_d->num_sub,n_audio-1);
 
       // check for Theora
 #   ifdef HAVE_OGGTHEORA
@@ -1549,7 +1574,7 @@
           break;
         }
       }
-      if(!precision && (is_keyframe || os->vorbis) ) {
+      if(!precision && (is_keyframe || os->vorbis || os->speex) ) {
         ogg_sub.lines = 0;
         vo_sub = &ogg_sub;
         vo_osd_changed(OSDTYPE_SUBTITLE);