Surprisingly, I was not able to find a simple code sample other the Internet that explain how to play a mp3 with Windows API, without using the too simple Windows Media Player API. Why WMP is not enough (not even talking about MCI - Media Control Interface which is even more basic than WMP)?
Well, It's lacking one important feature : It's only able to play from an url, so it's not possible for example to pack the song in an archive and play it from a memory location (although not a huge deal if you want to release the song on the side of your demo). Also I have never tested the timing returned by WMP (using probably IWMPControls3 getPositionTimeCode) and not really sure It's able to provide a reliable sync (at least, If you intend to use sync... but hey, is a demo without any sync, can be still called a demo?:)
So I started to find some peace of code around the net but they were covering only part of the problem. The starting point was to rely on the Audio Compression Manager API that provides an API conversion to perform for example a mp3 to pcm. Hopefully, I found the code from a guy that was kind enough to post the whole converter for a mp3 file using ACM. In the mean time, I found that Mark Heath, the author of NAudio posted few days ago a solution to convert a MP3 to WAV using NAudio. Looking at his code, he was using also ACM but he reported also some difficulties to implement a reliable MP3Frame/ID3Tag decoder in order to extract samplerate, bitrate, channels...etc. I didn't want to use this kind of heavy code and was looking a lighter and reliable solution for this : most of the people were talking about using the Windows Media Format SDK to get all this information from the file. The starting point is the WMCreateSyncReader method. Through this method, you are able to retrieve part of MP3Frame as well as ID3Tag.
Finally, I came with a patchwork solution :
- using SyncReader from WMF to extract song duration.
- using ACM to decode the mp3 to pcm
- using plain old waveOut functions to perform sound playback and retrieve sound playback position.
Also, the code is not decoding&playing in realtime the song but is instead performing the decoding in a single pass and then playing the decoded buffer. This requires that the full pcm song to be allocated, which could be around 20Mo to 50Mo depending on the size of your song (It's easy to calculate : durationInSecondsOfTheSong * 4 * 441000, so a 3min song is requiring 30Mo). This is not probably the best solution, but It's not a huge task to transform this code to do realtime decoding/playback. The downside is that It will take some CPU in your demo. So that in the end, It's a just tradeoff between memory vs cpu depending on your needs!
/* ----------------------------------------------------------------------
* MP3Player.h C++ class using plain Windows API
*
* Author: @lx/Alexandre Mutel, blog: http://xoofx.github.io/blog
* The software is provided "as is", without warranty of any kind.
* ----------------------------------------------------------------------*/
#pragma once
#include <windows.h>
#include <stdio.h>
#include <assert.h>
#include <mmreg.h>
#include <msacm.h>
#include <wmsdk.h>
#pragma comment(lib, "msacm32.lib")
#pragma comment(lib, "wmvcore.lib")
#pragma comment(lib, "winmm.lib")
#pragma intrinsic(memset,memcpy,memcmp)
#ifdef _DEBUG
#define mp3Assert(function) assert((function) == 0)
#else
//#define mp3Assert(function) if ( (function) != 0 ) { MessageBoxA(NULL,"Error in [ " #function "]", "Error",MB_OK); ExitProcess(0); }
#define mp3Assert(function) (function)
#endif
/*
* MP3Player class.
* Usage :
* MP3Player player;
* player.OpenFromFile("your.mp3");
* player.Play();
* Sleep((DWORD)(player.GetDuration()+1));
* player.Close();
*/
class MP3Player {
private:
HWAVEOUT hWaveOut;
DWORD bufferLength;
double durationInSecond;
BYTE* soundBuffer;
public:
/*
* OpenFromFile : loads a MP3 file and convert it internaly to a PCM format, ready for sound playback.
*/
HRESULT OpenFromFile(TCHAR* inputFileName){
// Open the mp3 file
HANDLE hFile = CreateFile(inputFileName, // open MYFILE.TXT
GENERIC_READ,
FILE_SHARE_READ, // share for reading
NULL, // no security
OPEN_EXISTING, // existing file only
FILE_ATTRIBUTE_NORMAL, // normal file
NULL); // no attr
assert( hFile != INVALID_HANDLE_VALUE);
// Get FileSize
DWORD fileSize = GetFileSize(hFile, NULL);
assert( fileSize != INVALID_FILE_SIZE);
// Alloc buffer for file
BYTE* mp3Buffer = (BYTE*)LocalAlloc(LPTR, fileSize);
// Read file and fill mp3Buffer
DWORD bytesRead;
DWORD resultReadFile = ReadFile( hFile, mp3Buffer, fileSize, &bytesRead, NULL);
assert(resultReadFile != 0);
assert( bytesRead == fileSize);
// Close File
CloseHandle(hFile);
// Open and convert MP3
HRESULT hr = OpenFromMemory(mp3Buffer, fileSize);
// Free mp3Buffer
LocalFree(mp3Buffer);
return hr;
}
/*
* OpenFromMemory : loads a MP3 from memory and convert it internaly to a PCM format, ready for sound playback.
*/
HRESULT OpenFromMemory(BYTE* mp3InputBuffer, DWORD mp3InputBufferSize){
IWMSyncReader* wmSyncReader;
IWMHeaderInfo* wmHeaderInfo;
IWMProfile* wmProfile;
IWMStreamConfig* wmStreamConfig;
IWMMediaProps* wmMediaProperties;
WORD wmStreamNum = 0;
WMT_ATTR_DATATYPE wmAttrDataType;
DWORD durationInSecondInt;
QWORD durationInNano;
DWORD sizeMediaType;
DWORD maxFormatSize = 0;
HACMSTREAM acmMp3stream = NULL;
HGLOBAL mp3HGlobal;
IStream* mp3Stream;
// Define output format
static WAVEFORMATEX pcmFormat = {
WAVE_FORMAT_PCM, // WORD wFormatTag; /* format type */
2, // WORD nChannels; /* number of channels (i.e. mono, stereo...) */
44100, // DWORD nSamplesPerSec; /* sample rate */
4 * 44100, // DWORD nAvgBytesPerSec; /* for buffer estimation */
4, // WORD nBlockAlign; /* block size of data */
16, // WORD wBitsPerSample; /* number of bits per sample of mono data */
0, // WORD cbSize; /* the count in bytes of the size of */
};
const DWORD MP3_BLOCK_SIZE = 522;
// Define input format
static MPEGLAYER3WAVEFORMAT mp3Format = {
{
WAVE_FORMAT_MPEGLAYER3, // WORD wFormatTag; /* format type */
2, // WORD nChannels; /* number of channels (i.e. mono, stereo...) */
44100, // DWORD nSamplesPerSec; /* sample rate */
128 * (1024 / 8), // DWORD nAvgBytesPerSec; not really used but must be one of 64, 96, 112, 128, 160kbps
1, // WORD nBlockAlign; /* block size of data */
0, // WORD wBitsPerSample; /* number of bits per sample of mono data */
MPEGLAYER3_WFX_EXTRA_BYTES, // WORD cbSize;
},
MPEGLAYER3_ID_MPEG, // WORD wID;
MPEGLAYER3_FLAG_PADDING_OFF, // DWORD fdwFlags;
MP3_BLOCK_SIZE, // WORD nBlockSize;
1, // WORD nFramesPerBlock;
1393, // WORD nCodecDelay;
};
// -----------------------------------------------------------------------------------
// Extract and verify mp3 info : duration, type = mp3, sampleRate = 44100, channels = 2
// -----------------------------------------------------------------------------------
// Initialize COM
CoInitialize(0);
// Create SyncReader
mp3Assert( WMCreateSyncReader( NULL, WMT_RIGHT_PLAYBACK , &wmSyncReader ) );
// Alloc With global and create IStream
mp3HGlobal = GlobalAlloc(GPTR, mp3InputBufferSize);
assert(mp3HGlobal != 0);
void* mp3HGlobalBuffer = GlobalLock(mp3HGlobal);
memcpy(mp3HGlobalBuffer, mp3InputBuffer, mp3InputBufferSize);
GlobalUnlock(mp3HGlobal);
mp3Assert( CreateStreamOnHGlobal(mp3HGlobal, FALSE, &mp3Stream) );
// Open MP3 Stream
mp3Assert( wmSyncReader->OpenStream(mp3Stream) );
// Get HeaderInfo interface
mp3Assert( wmSyncReader->QueryInterface(&wmHeaderInfo) );
// Retrieve mp3 song duration in seconds
WORD lengthDataType = sizeof(QWORD);
mp3Assert( wmHeaderInfo->GetAttributeByName(&wmStreamNum, L"Duration", &wmAttrDataType, (BYTE*)&durationInNano, &lengthDataType ) );
durationInSecond = ((double)durationInNano)/10000000.0;
durationInSecondInt = (int)(durationInNano/10000000)+1;
// Sequence of call to get the MediaType
// WAVEFORMATEX for mp3 can then be extract from MediaType
mp3Assert( wmSyncReader->QueryInterface(&wmProfile) );
mp3Assert( wmProfile->GetStream(0, &wmStreamConfig) );
mp3Assert( wmStreamConfig->QueryInterface(&wmMediaProperties) );
// Retrieve sizeof MediaType
mp3Assert( wmMediaProperties->GetMediaType(NULL, &sizeMediaType) );
// Retrieve MediaType
WM_MEDIA_TYPE* mediaType = (WM_MEDIA_TYPE*)LocalAlloc(LPTR,sizeMediaType);
mp3Assert( wmMediaProperties->GetMediaType(mediaType, &sizeMediaType) );
// Check that MediaType is audio
assert(mediaType->majortype == WMMEDIATYPE_Audio);
// assert(mediaType->pbFormat == WMFORMAT_WaveFormatEx);
// Check that input is mp3
WAVEFORMATEX* inputFormat = (WAVEFORMATEX*)mediaType->pbFormat;
assert( inputFormat->wFormatTag == WAVE_FORMAT_MPEGLAYER3);
assert( inputFormat->nSamplesPerSec == 44100);
assert( inputFormat->nChannels == 2);
// Release COM interface
// wmSyncReader->Close();
wmMediaProperties->Release();
wmStreamConfig->Release();
wmProfile->Release();
wmHeaderInfo->Release();
wmSyncReader->Release();
// Free allocated mem
LocalFree(mediaType);
// -----------------------------------------------------------------------------------
// Convert mp3 to pcm using acm driver
// The following code is mainly inspired from http://david.weekly.org/code/mp3acm.html
// -----------------------------------------------------------------------------------
// Get maximum FormatSize for all acm
mp3Assert( acmMetrics( NULL, ACM_METRIC_MAX_SIZE_FORMAT, &maxFormatSize ) );
// Allocate PCM output sound buffer
bufferLength = durationInSecond * pcmFormat.nAvgBytesPerSec;
soundBuffer = (BYTE*)LocalAlloc(LPTR, bufferLength);
acmMp3stream = NULL;
switch ( acmStreamOpen( &acmMp3stream, // Open an ACM conversion stream
NULL, // Query all ACM drivers
(LPWAVEFORMATEX)&mp3Format, // input format : mp3
&pcmFormat, // output format : pcm
NULL, // No filters
0, // No async callback
0, // No data for callback
0 // No flags
)
) {
case MMSYSERR_NOERROR:
break; // success!
case MMSYSERR_INVALPARAM:
assert( !"Invalid parameters passed to acmStreamOpen" );
return E_FAIL;
case ACMERR_NOTPOSSIBLE:
assert( !"No ACM filter found capable of decoding MP3" );
return E_FAIL;
default:
assert( !"Some error opening ACM decoding stream!" );
return E_FAIL;
}
// Determine output decompressed buffer size
unsigned long rawbufsize = 0;
mp3Assert( acmStreamSize( acmMp3stream, MP3_BLOCK_SIZE, &rawbufsize, ACM_STREAMSIZEF_SOURCE ) );
assert( rawbufsize > 0 );
// allocate our I/O buffers
static BYTE mp3BlockBuffer[MP3_BLOCK_SIZE];
//LPBYTE mp3BlockBuffer = (LPBYTE) LocalAlloc( LPTR, MP3_BLOCK_SIZE );
LPBYTE rawbuf = (LPBYTE) LocalAlloc( LPTR, rawbufsize );
// prepare the decoder
static ACMSTREAMHEADER mp3streamHead;
// memset( &mp3streamHead, 0, sizeof(ACMSTREAMHEADER ) );
mp3streamHead.cbStruct = sizeof(ACMSTREAMHEADER );
mp3streamHead.pbSrc = mp3BlockBuffer;
mp3streamHead.cbSrcLength = MP3_BLOCK_SIZE;
mp3streamHead.pbDst = rawbuf;
mp3streamHead.cbDstLength = rawbufsize;
mp3Assert( acmStreamPrepareHeader( acmMp3stream, &mp3streamHead, 0 ) );
BYTE* currentOutput = soundBuffer;
DWORD totalDecompressedSize = 0;
static ULARGE_INTEGER newPosition;
static LARGE_INTEGER seekValue;
mp3Assert( mp3Stream->Seek(seekValue, STREAM_SEEK_SET, &newPosition) );
while(1) {
// suck in some MP3 data
ULONG count;
mp3Assert( mp3Stream->Read(mp3BlockBuffer, MP3_BLOCK_SIZE, &count) );
if( count != MP3_BLOCK_SIZE )
break;
// convert the data
mp3Assert( acmStreamConvert( acmMp3stream, &mp3streamHead, ACM_STREAMCONVERTF_BLOCKALIGN ) );
// write the decoded PCM to disk
//count = fwrite( rawbuf, 1, mp3streamHead.cbDstLengthUsed, fpOut );
memcpy(currentOutput, rawbuf, mp3streamHead.cbDstLengthUsed);
totalDecompressedSize += mp3streamHead.cbDstLengthUsed;
currentOutput += mp3streamHead.cbDstLengthUsed;
};
mp3Assert( acmStreamUnprepareHeader( acmMp3stream, &mp3streamHead, 0 ) );
LocalFree(rawbuf);
mp3Assert( acmStreamClose( acmMp3stream, 0 ) );
// Release allocated memory
mp3Stream->Release();
GlobalFree(mp3HGlobal);
return S_OK;
}
/*
* Close : close the current MP3Player, stop playback and free allocated memory
*/
void __inline Close() {
// Reset before close (otherwise, waveOutClose will not work on playing buffer)
waveOutReset(hWaveOut);
// Close the waveOut
waveOutClose(hWaveOut);
// Free allocated memory
LocalFree(soundBuffer);
}
/*
* GetDuration : return the music duration in seconds
*/
double __inline GetDuration() {
return durationInSecond;
}
/*
* GetPosition : return the current position from the sound playback (used from sync)
*/
double GetPosition() {
static MMTIME MMTime = { TIME_SAMPLES, 0};
waveOutGetPosition(hWaveOut, &MMTime, sizeof(MMTIME));
return ((double)MMTime.u.sample)/( 44100.0);
}
/*
* Play : play the previously opened mp3
*/
void Play() {
static WAVEHDR WaveHDR = { (LPSTR)soundBuffer, bufferLength };
// Define output format
static WAVEFORMATEX pcmFormat = {
WAVE_FORMAT_PCM, // WORD wFormatTag; /* format type */
2, // WORD nChannels; /* number of channels (i.e. mono, stereo...) */
44100, // DWORD nSamplesPerSec; /* sample rate */
4 * 44100, // DWORD nAvgBytesPerSec; /* for buffer estimation */
4, // WORD nBlockAlign; /* block size of data */
16, // WORD wBitsPerSample; /* number of bits per sample of mono data */
0, // WORD cbSize; /* the count in bytes of the size of */
};
mp3Assert( waveOutOpen( &hWaveOut, WAVE_MAPPER, &pcmFormat, NULL, 0, CALLBACK_NULL ) );
mp3Assert( waveOutPrepareHeader( hWaveOut, &WaveHDR, sizeof(WaveHDR) ) );
mp3Assert( waveOutWrite ( hWaveOut, &WaveHDR, sizeof(WaveHDR) ) );
}
};
#pragma function(memset,memcpy,memcmp)
The usage is then pretty simple :
MP3Player player;
// Open the mp3 from a file...
player.OpenFromFile("your.mp3");
// or From a memory location!
player.OpenFromMemory(ptrToMP3Song, bytesLength);
player.Play();
while (...) {
// Do here your synchro in the demo using
...
double playerPositionInSeconds = player.GetPosition()
...
}
player.Close();
And that's all! Hope someone will find this useful!
You can download a Visual Studio project using the MP3Player.h class.