Implementing FLAC playback through WASAPI

Started by
5 comments, last by AquaBytez 5 years, 6 months ago

As the title says, I'm a bit stumped on this. I'm not sure what to do for the write_callback of libFLAC++. I have implemented the rest of the callbacks correctly (I think). So that way, libFLAC can decode using an ifstream rather than a C-style FILE*.

Below is my implementation of the decoder and its callbacks.


#include <FLAC++/decoder.h>
#include <fstream>
#include <iostream>
  
class FLACStreamDecoder : public FLAC::Decoder::Stream {
private:
	std::ifstream& input;
	uint32_t sample_rate;
	uint32_t channels;
	uint32_t bits_per_sample;

public:
	~FLACStreamDecoder();
	// The FLAC decoder will take ownership of the ifstream.
	FLACStreamDecoder(std::ifstream& arg) : FLAC::Decoder::Stream(), input(arg) {}

	uint32_t getSampleRate() { return sample_rate; }
	uint32_t getChannels() { return channels; }
	uint32_t getBitsPerSample() { return bits_per_sample; }

	virtual void metadata_callback(const FLAC__StreamMetadata *);
	virtual ::FLAC__StreamDecoderReadStatus read_callback(FLAC__byte *, size_t *);
	virtual ::FLAC__StreamDecoderWriteStatus write_callback(const FLAC__Frame *, const FLAC__int32 * const *);
	virtual void error_callback(FLAC__StreamDecoderErrorStatus);
	virtual ::FLAC__StreamDecoderSeekStatus seek_callback(FLAC__uint64 absolute_byte_offset);
	virtual ::FLAC__StreamDecoderTellStatus tell_callback(FLAC__uint64 *absolute_byte_offset);
	virtual ::FLAC__StreamDecoderLengthStatus length_callback(FLAC__uint64 *stream_length);
	virtual bool eof_callback();
};

FLACStreamDecoder::~FLACStreamDecoder() { input.close(); }

void FLACStreamDecoder::metadata_callback(const FLAC__StreamMetadata * metadata) {
	std::cerr << "metadata callback called!" << std::endl;
	if (FLAC__METADATA_TYPE_STREAMINFO == metadata->type) {
		std::cerr << "streaminfo found!" << std::endl;
		sample_rate = metadata->data.stream_info.sample_rate;
		channels = metadata->data.stream_info.channels;
		bits_per_sample = metadata->data.stream_info.bits_per_sample;
	}
}

static_assert(sizeof(char) == sizeof(FLAC__byte), "invalid char size");

FLAC__StreamDecoderReadStatus FLACStreamDecoder::read_callback(FLAC__byte * buffer, size_t * nbytes) {
	if (nbytes && *nbytes > 0) {
		input.read(reinterpret_cast<char *>(buffer), *nbytes);
		*nbytes = input.gcount();
		if (input.fail()) {
			return FLAC__STREAM_DECODER_READ_STATUS_ABORT;
		}
		else if (input.eof()) {
			return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM;
		}
		else {
			return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
		}
	}
	else {
		return FLAC__STREAM_DECODER_READ_STATUS_ABORT;
	}
}

::FLAC__StreamDecoderSeekStatus FLACStreamDecoder::seek_callback(FLAC__uint64 absolute_byte_offset) {
	if (input.is_open()) {
		input.seekg(absolute_byte_offset);
		return FLAC__StreamDecoderSeekStatus::FLAC__STREAM_DECODER_SEEK_STATUS_OK;
	}
	return FLAC__StreamDecoderSeekStatus::FLAC__STREAM_DECODER_SEEK_STATUS_ERROR;
}

::FLAC__StreamDecoderTellStatus FLACStreamDecoder::tell_callback(FLAC__uint64 *absolute_byte_offset) {
	if (input.is_open()) {
		*absolute_byte_offset = input.tellg();
		return FLAC__StreamDecoderTellStatus::FLAC__STREAM_DECODER_TELL_STATUS_OK;
	}
	return FLAC__StreamDecoderTellStatus::FLAC__STREAM_DECODER_TELL_STATUS_ERROR;

}

::FLAC__StreamDecoderLengthStatus FLACStreamDecoder::length_callback(FLAC__uint64 *stream_length) {
	if (input.is_open()) {
		std::streampos currentPos = input.tellg();

		input.seekg(std::ios::end);
		*stream_length = input.tellg();
		input.seekg(currentPos);

		return FLAC__StreamDecoderLengthStatus::FLAC__STREAM_DECODER_LENGTH_STATUS_OK;
	}
	return FLAC__StreamDecoderLengthStatus::FLAC__STREAM_DECODER_LENGTH_STATUS_ERROR;
}

bool FLACStreamDecoder::eof_callback() {
	return input.eof();
}

// This is called for every audio frame.
FLAC__StreamDecoderWriteStatus FLACStreamDecoder::write_callback(const FLAC__Frame * frame, const FLAC__int32 * const * buffer) {
	// A the size of a FLAC Frame is frame->header.channels * frame->header.blocksize. That is, the size of the buffer array is the number of channels in the current frame, times the number of samples per channel (blocksize). 

	return FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE;
}

void FLACStreamDecoder::error_callback(FLAC__StreamDecoderErrorStatus status) {
	std::string msg;
	switch (status) {
	case FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER:
		msg = "BAD HEADER";
		break;
	case FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC:
		msg = "LOST SYNC";
		break;
	case FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH:
		msg = "FRAME CRC MISMATCH";
		break;
	case FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM:
		msg = "UNPARSEABLE STREAM";
		break;
	default:
		msg = "UNKNOWN ERROR";
		break;
	}
	std::cerr << msg << std::endl;
}

 

As you see, I have no idea what to do for the write_callback. Any help with regards to that would be appreciated. To be a bit more clear, the problem is that WASAPI has a frame size of numChannels * bitsPerSample bits, or numChannels * bitsPerSample / 8 bytes. I can't seem to figure out how to go from a FLAC frame to a WASAPI frame.

 

I'll also paste my WASAPI playback code below:
 


#pragma once
#include <iostream>
#define NOMINMAX
#include <Mmdeviceapi.h>
#include <Audioclient.h>
#include <fstream>
#include <algorithm>

class WASAPIBackend
{
public:
	WASAPIBackend();
	~WASAPIBackend();
private:
	HRESULT hr;
	IMMDeviceEnumerator* pDeviceEnumerator;
	IMMDevice* pDevice;
	IAudioClient3* pAudioClient;
	IAudioRenderClient* pAudioRenderClient;
	//WAVEFORMATEX* pMixFormat;
	WAVEFORMATEX mixFormat;
	uint32_t defaultPeriodInFrames, fundamentalPeriodInFrames, minPeriodInFrames, maxPeriodInFrames;
	HANDLE audioSamplesReadyEvent;
};

#include "WASAPIBackend.h"

constexpr void SafeRelease(IUnknown** p) {
	if (p) {
		(*p)->Release();
	}
}

WASAPIBackend::WASAPIBackend() : hr(0), pDeviceEnumerator(nullptr), pDevice(nullptr), pAudioClient(nullptr), pAudioRenderClient(nullptr)/*, pMixFormat(nullptr)*/
{
	try
	{
		// COM result
		hr = S_OK;

		hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
		if (FAILED(hr)) throw std::runtime_error("CoInitialize error");

		hr = CoCreateInstance(
			__uuidof(MMDeviceEnumerator),
			nullptr,
			CLSCTX_ALL,
			__uuidof(IMMDeviceEnumerator),
			reinterpret_cast<void**>(&pDeviceEnumerator));
		if (FAILED(hr)) throw std::runtime_error("CoCreateInstance error");

		hr = pDeviceEnumerator->GetDefaultAudioEndpoint(EDataFlow::eRender, ERole::eConsole, &pDevice);
		if (FAILED(hr)) throw std::runtime_error("IMMDeviceEnumerator.GetDefaultAudioEndpoint error");
		std::cout << "IMMDeviceEnumerator.GetDefaultAudioEndpoint()->OK" << std::endl;

		hr = pDevice->Activate(__uuidof(IAudioClient3), CLSCTX_ALL, nullptr, reinterpret_cast<void**>(&pAudioClient));
		if (FAILED(hr)) throw std::runtime_error("IMMDevice.Activate error");
		std::cout << "IMMDevice.Activate()->OK" << std::endl;

		WAVEFORMATEX wave_format = {};
		wave_format.wFormatTag = WAVE_FORMAT_PCM;
		wave_format.nChannels = 2;
		wave_format.nSamplesPerSec = 44100;
		//nSamplesPerSec * nBlockAlign
		wave_format.nAvgBytesPerSec = 44100 * 2 * 16 / 8;
		wave_format.nBlockAlign = 2 * 16 / 8;
		wave_format.wBitsPerSample = 16;
		//pAudioClient->GetMixFormat(reinterpret_cast<WAVEFORMATEX**>(&wave_format));

		hr = pAudioClient->GetSharedModeEnginePeriod(&wave_format, &defaultPeriodInFrames, &fundamentalPeriodInFrames, &minPeriodInFrames, &maxPeriodInFrames);
		hr = pAudioClient->GetSharedModeEnginePeriod(&wave_format, &defaultPeriodInFrames, &fundamentalPeriodInFrames, &minPeriodInFrames, &maxPeriodInFrames);

		if (FAILED(hr)) throw std::runtime_error("IAudioClient.GetDevicePeriod error");
		std::cout << "default device period=" << defaultPeriodInFrames << "[nano seconds]" << std::endl;
		std::cout << "minimum device period=" << minPeriodInFrames << "[nano seconds]" << std::endl;

		hr = pAudioClient->InitializeSharedAudioStream(AUDCLNT_STREAMFLAGS_EVENTCALLBACK, minPeriodInFrames, &wave_format, nullptr);

		if (FAILED(hr)) throw std::runtime_error("IAudioClient.Initialize error");

		std::cout << "IAudioClient.Initialize()->OK" << std::endl;

		// event
		audioSamplesReadyEvent = CreateEvent(nullptr, false, false, nullptr);
		if (FAILED(hr)) throw std::runtime_error("CreateEvent error");

		hr = pAudioClient->SetEventHandle(audioSamplesReadyEvent);
		if (FAILED(hr)) throw std::runtime_error("IAudioClient.SetEventHandle error");

		UINT32 numBufferFrames = 0;
		hr = pAudioClient->GetBufferSize(&numBufferFrames);

		if (FAILED(hr)) throw std::runtime_error("IAudioClient.GetBufferSize error");
		std::cout << "buffer frame size=" << numBufferFrames << "[frames]" << std::endl;

		hr = pAudioClient->GetService(__uuidof(IAudioRenderClient), reinterpret_cast<void**>(&pAudioRenderClient));
		std::cout << std::hex << hr << std::endl;
		if (FAILED(hr)) throw std::runtime_error("IAudioClient.GetService error");

		BYTE *pData = nullptr;
		hr = pAudioRenderClient->GetBuffer(numBufferFrames, &pData);
		if (FAILED(hr)) throw std::runtime_error("IAudioRenderClient.GetBuffer error");

		const char* flac_filename = "audio/07 DaMonz - Choose Your Destiny (Super Smash Bros. Melee).flac";
		std::ifstream stream(flac_filename, std::ifstream::binary);
		FLACStreamDecoder streamer(stream);
		auto initStatus = streamer.init();
		if (FLAC__STREAM_DECODER_INIT_STATUS_OK != initStatus) {
			std::cerr << "ERROR INITIALIZING" << std::endl;
		}

		else {
			streamer.process_until_end_of_metadata();
			if (!streamer.process_single()) {
				std::cerr << "FAILED PROCESSING" << std::endl;
			}
			else {
				std::cerr << "SUCCEEDED PROCESSING" << std::endl;
			}
		}

		hr = pAudioRenderClient->ReleaseBuffer(numBufferFrames, 0);
		if (FAILED(hr)) throw std::runtime_error("IAudioRenderClient.ReleaseBuffer error");

		AudioClientProperties audioClientProp = {};
		audioClientProp.cbSize = sizeof(AudioClientProperties);
		audioClientProp.bIsOffload = true;
		audioClientProp.eCategory = AUDIO_STREAM_CATEGORY::AudioCategory_GameMedia;
		audioClientProp.Options = AUDCLNT_STREAMOPTIONS::AUDCLNT_STREAMOPTIONS_MATCH_FORMAT;
		pAudioClient->SetClientProperties(&audioClientProp);

		hr = pAudioClient->Start();
		if (FAILED(hr)) throw std::runtime_error("IAudioClient.Start error");
		std::cout << "IAudioClient.Start()->OK" << std::endl;

		//bool playing = (streamer.get_total_samples() > numBufferFrames);
		while (/*playing*/ true)
		{
			WaitForSingleObject(audioSamplesReadyEvent, INFINITE);

			uint32_t numPaddingFrames = 0;
			hr = pAudioClient->GetCurrentPadding(&numPaddingFrames);
			if (FAILED(hr)) throw std::runtime_error("IAudioClient.GetCurrentPadding error");

			uint32_t numAvailableFrames = numBufferFrames - numPaddingFrames;
			if (numAvailableFrames == 0) continue;

			hr = pAudioRenderClient->GetBuffer(numAvailableFrames, &pData);
			if (FAILED(hr)) throw std::runtime_error("IAudioRenderClient.GetBuffer error");

			for (size_t i = 0; i < numAvailableFrames; ++i) {
				streamer.process_single();
				memcpy(&pData[i], &m_audioFrame, streamer.get_blocksize() * streamer.getChannels());
			}

			hr = pAudioRenderClient->ReleaseBuffer((uint32_t)numAvailableFrames, 0);
			if (FAILED(hr)) throw std::runtime_error("IAudioRenderClient.ReleaseBuffer error");

			//playing = (streamer.get_total_samples() < numAvailableFrames);
		}

		do
		{
			// wait for buffer to be empty
			WaitForSingleObject(audioSamplesReadyEvent, INFINITE);

			uint32_t numPaddingFrames = 0;
			hr = pAudioClient->GetCurrentPadding(&numPaddingFrames);
			if (FAILED(hr)) throw std::runtime_error("IAudioClient.GetCurrentPadding error");

			if (numPaddingFrames == 0)
			{
				std::cout << "current buffer padding=0[frames]" << std::endl;
				break;
			}
		} while (true);

		hr = pAudioClient->Stop();
		if (FAILED(hr)) throw std::runtime_error("IAudioClient.Stop error");
		std::cout << "IAudioClient.Stop()->OK" << std::endl;

	}
	catch (std::exception& ex)
	{
		std::cout << "error:" << ex.what() << std::endl;
	}

}

WASAPIBackend::~WASAPIBackend()
{
	//CoTaskMemFree(pMixFormat);
	if (audioSamplesReadyEvent) CloseHandle(audioSamplesReadyEvent);
	SafeRelease(reinterpret_cast<IUnknown**>(&pAudioRenderClient));
	SafeRelease(reinterpret_cast<IUnknown**>(&pAudioClient));
	SafeRelease(reinterpret_cast<IUnknown**>(&pDevice));
	SafeRelease(reinterpret_cast<IUnknown**>(&pDeviceEnumerator));
	CoUninitialize();
}

 

The playback loop is most definitely broken, but I can't fix it if the decoder isn't working. Please note that I'm doing this for learning purposes, to get a better understanding of how libraries like SDL_Mixer and libsndfile work at a more fundamental level.

Advertisement

Do you have a basic knowledge of the hardware/ OS specific software part i audio processing?

You are asking two different things here and I try my best to answer both. Audio on the hardware level is just a wave that is presented as amplitude signal of -5v to 5v so a speaker can "pulse" and so generate audio waves the human ears could translate into sound and voice.

Those waves are a layer above just buffers of amplitude data in the form of raw bytes that are passed to the audio stream and pushed to the speakers via hardware bus. Accessing this is the lowes level we have to process sound.

The OS does another thing too, as it is just possible to have only one audio stream at a time for each speaker, the OS mixes sounds and voices together to have playing music at the same time as some OS message voices without stop the music while the OS sound is playing. This is done by an software audio mixer built into the driver and/or OS.

Audio libraries know provide an own capability to mix different audio streams together under certain circumstances using rules you or the author of the library have provided. When you hit the "PlaySound" function, the whole buffer of that audio resource is queued from its current position into the libs audio mixer queue that is providing the data for each "audio frame". Sounds are mixed together using their volumn and in case of 3D sound also where the audio listener is in reference to the audio source.

A funny fact, silence is also an audio stream but on the same level so no amplitudes are passed here.

The frame mismatch you have here might depend on the frequency you initialized your audio source with. Every audio file carries a desired frequency at what it is intended to play "best". You could initialize your hardware devices with an ammount of supported channels (up to 7.1 sound) and frequency (up to 96 kHz if I remember my experiments correclty). You now have either to sample your FLAC frame down to match the WASAPI frame or you need to initialize the WASAPI frame with a higher frewuency to get more bits into it, this is about your choice.

A side note, normally Audio runs in it's own thread or even threads to have mixing of next audio frame be in parallel to playing the current one. You should use a continous buffer and ensure to always have enougth data present before the next mixing step ends or you'll get shuttering in your playback

The test audio file I have is a 44100Hz 16bit FLAC (according to its metadata). Which is why I initialize the WAVEFORMATEX as 16bit, 44100Hz. I know that at a later point in time I may need to resample the audio if the source file and the audio engine formats don't match. I will need to test this however, as I have seen different information regarding this. In exclusive mode, it appears that you absolutely have to match formats, but it seems that in shared mode, WASAPI *might* match the formats for you.

Now, If I understood the rest of the post... I need a circular buffer for the audio that contains up to numRequestedFrames. The thing is, a WASAPI audio frame is of size numChannels * bitsPerSample / 8. But from everything I have seen, it seems that a FLAC audio frame is of size frame->header.blocksize (number of samples in the frame, in my case 1000, and it seems to be constant) * frame->header.channels (in my case, this is 2). I know that the buffer I'm given in the write callback is a pointer to pointer of channels, that each contains blocksize audio frames. So, for example if the input file contains 2 channels, buffer[0] would be 1000 samples for the left channel. buffer[1] would be another 1000 samples for the right channel. Which makes a total of 2000 samples for a stereo file. 

 

And that's the problem. How do I fit 2000 samples into an audio frame that is supposedly only 4bytes (32 bits) in size?

 

Edit: I plan on making this run on its own thread in the future, but I thought it would be fine for now, since I'm maintaining a separate project with just this code until it works.

 

Thanks for the help so far, I appreciate it. ?

I think I almost got it, now I get a very annoying buzzing sound (good sign, at least I'm getting output of some sort).

 


std::vector<FLAC__int32*> audioFrames;

class FLACStreamDecoder : public FLAC::Decoder::Stream {
private:
	std::ifstream& input;

public:
	~FLACStreamDecoder();
	// The FLAC decoder will take ownership of the ifstream.
	FLACStreamDecoder(std::ifstream& arg) : FLAC::Decoder::Stream(), input(arg) {}

	virtual void metadata_callback(const FLAC__StreamMetadata *);
	virtual ::FLAC__StreamDecoderReadStatus read_callback(FLAC__byte *, size_t *);
	virtual ::FLAC__StreamDecoderWriteStatus write_callback(const FLAC__Frame *, const FLAC__int32 * const *);
	virtual void error_callback(FLAC__StreamDecoderErrorStatus);
	virtual ::FLAC__StreamDecoderSeekStatus seek_callback(FLAC__uint64 absolute_byte_offset);
	virtual ::FLAC__StreamDecoderTellStatus tell_callback(FLAC__uint64 *absolute_byte_offset);
	virtual ::FLAC__StreamDecoderLengthStatus length_callback(FLAC__uint64 *stream_length);
	virtual bool eof_callback();
};

FLACStreamDecoder::~FLACStreamDecoder() { input.close(); }

void FLACStreamDecoder::metadata_callback(const FLAC__StreamMetadata * metadata) {
	if (FLAC__METADATA_TYPE_STREAMINFO == metadata->type) {
		std::cerr << "Sample rate: " << metadata->data.stream_info.sample_rate << std::endl;
		std::cerr << "Bit-rate: " << metadata->data.stream_info.bits_per_sample << std::endl;
		std::cerr << "Channels: " << metadata->data.stream_info.channels << std::endl;
		std::cerr << "Total samples: " << metadata->data.stream_info.total_samples << std::endl;
	}
}

static_assert(sizeof(char) == sizeof(FLAC__byte), "invalid char size");

FLAC__StreamDecoderReadStatus FLACStreamDecoder::read_callback(FLAC__byte * buffer, size_t * nbytes) {
	if (nbytes && *nbytes > 0) {
		input.read(reinterpret_cast<char *>(buffer), *nbytes);
		*nbytes = input.gcount();
		if (input.fail()) {
			return FLAC__STREAM_DECODER_READ_STATUS_ABORT;
		}
		else if (input.eof()) {
			return FLAC__STREAM_DECODER_READ_STATUS_END_OF_STREAM;
		}
		else {
			return FLAC__STREAM_DECODER_READ_STATUS_CONTINUE;
		}
	}
	else {
		return FLAC__STREAM_DECODER_READ_STATUS_ABORT;
	}
}

::FLAC__StreamDecoderSeekStatus FLACStreamDecoder::seek_callback(FLAC__uint64 absolute_byte_offset) {
	if (input.is_open()) {
		input.seekg(absolute_byte_offset);
		return FLAC__StreamDecoderSeekStatus::FLAC__STREAM_DECODER_SEEK_STATUS_OK;
	}
	return FLAC__StreamDecoderSeekStatus::FLAC__STREAM_DECODER_SEEK_STATUS_ERROR;
}

::FLAC__StreamDecoderTellStatus FLACStreamDecoder::tell_callback(FLAC__uint64 *absolute_byte_offset) {
	if (input.is_open()) {
		*absolute_byte_offset = input.tellg();
		return FLAC__StreamDecoderTellStatus::FLAC__STREAM_DECODER_TELL_STATUS_OK;
	}
	return FLAC__StreamDecoderTellStatus::FLAC__STREAM_DECODER_TELL_STATUS_ERROR;

}

::FLAC__StreamDecoderLengthStatus FLACStreamDecoder::length_callback(FLAC__uint64 *stream_length) {
	if (input.is_open()) {
		std::streampos currentPos = input.tellg();

		input.seekg(std::ios::end);
		*stream_length = input.tellg();
		input.seekg(currentPos);

		return FLAC__StreamDecoderLengthStatus::FLAC__STREAM_DECODER_LENGTH_STATUS_OK;
	}
	return FLAC__StreamDecoderLengthStatus::FLAC__STREAM_DECODER_LENGTH_STATUS_ERROR;
}

bool FLACStreamDecoder::eof_callback() {
	return input.eof();
}

// Single audio frame
FLAC__StreamDecoderWriteStatus FLACStreamDecoder::write_callback(const FLAC__Frame * frame, const FLAC__int32 * const * buffer) {
	FLAC__int32        *p_iSamples;         // Temporary buffer used to interleave samples.
	const unsigned int  channels = frame->header.channels;       // Number of channels decoded by libFLAC 
	const unsigned int  blocksize = frame->header.blocksize;     // Number of samples decoded by libFLAC.
																	//  Each sample contains frame->header.channels channels.
	p_iSamples = (FLAC__int32*) new FLAC__int32[channels * blocksize];
	// Zero the p_samples pointer.
	memset(p_iSamples, 0, sizeof(FLAC__int32) * channels * blocksize);

	// Copy from the FLAC buffer into the interleaved array of samples
	for (size_t sample_index = 0; sample_index < blocksize; ++sample_index)
	{
		for (size_t channel_index = 0; channel_index < channels; ++channel_index)
		{
			*p_iSamples++ = buffer[channel_index][sample_index];
		}
	}

	// Set p_iSamples to point to the first sample.
	p_iSamples -= blocksize * channels;

	audioFrames.push_back(p_iSamples);

	// Free allocated memory
	delete[] p_iSamples;

	return FLAC__StreamDecoderWriteStatus::FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE;
}

void FLACStreamDecoder::error_callback(FLAC__StreamDecoderErrorStatus status) {
	std::string msg;
	switch (status) {
	case FLAC__StreamDecoderErrorStatus::FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER:
		msg = "BAD HEADER";
		break;
	case FLAC__StreamDecoderErrorStatus::FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC:
		msg = "LOST SYNC";
		break;
	case FLAC__StreamDecoderErrorStatus::FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH:
		msg = "FRAME CRC MISMATCH";
		break;
	case FLAC__StreamDecoderErrorStatus::FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM:
		msg = "UNPARSEABLE STREAM";
		break;
	default:
		msg = "UNKNOWN ERROR";
		break;
	}
	std::cerr << msg << std::endl;
}

And, as before, my playback code:


constexpr void SafeRelease(IUnknown** p) {
	if (p) {
		(*p)->Release();
	}
}

WASAPIBackend::WASAPIBackend() : hr(0), pDeviceEnumerator(nullptr), pDevice(nullptr), pAudioClient(nullptr), pAudioRenderClient(nullptr)/*, pMixFormat(nullptr)*/
{
	try
	{
		hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
		if (FAILED(hr))
		{
			throw std::runtime_error("CoInitialize error");
		}

		hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), reinterpret_cast<void**>(&pDeviceEnumerator));
		if (FAILED(hr)) { 
			throw std::runtime_error("CoCreateInstance error");
		}

		hr = pDeviceEnumerator->GetDefaultAudioEndpoint(EDataFlow::eRender, ERole::eConsole, &pDevice);
		if (FAILED(hr)) throw std::runtime_error("IMMDeviceEnumerator.GetDefaultAudioEndpoint error");
		std::cout << "IMMDeviceEnumerator.GetDefaultAudioEndpoint()->OK" << std::endl;

		hr = pDevice->Activate(__uuidof(IAudioClient3), CLSCTX_ALL, nullptr, reinterpret_cast<void**>(&pAudioClient));
		if (FAILED(hr)) throw std::runtime_error("IMMDevice.Activate error");
		std::cout << "IMMDevice.Activate()->OK" << std::endl;

		WAVEFORMATEX wave_format = {};
		wave_format.wFormatTag = WAVE_FORMAT_PCM;
		wave_format.nChannels = 2;
		wave_format.nSamplesPerSec = 44100;
		//nSamplesPerSec * nBlockAlign
		wave_format.nAvgBytesPerSec = 44100 * 2 * 16 / 8;
		wave_format.nBlockAlign = 2 * 16 / 8;
		wave_format.wBitsPerSample = 16;
		//pAudioClient->GetMixFormat(reinterpret_cast<WAVEFORMATEX**>(&wave_format));

		hr = pAudioClient->GetSharedModeEnginePeriod(&wave_format, &defaultPeriodInFrames, &fundamentalPeriodInFrames, &minPeriodInFrames, &maxPeriodInFrames);
		hr = pAudioClient->GetSharedModeEnginePeriod(&wave_format, &defaultPeriodInFrames, &fundamentalPeriodInFrames, &minPeriodInFrames, &maxPeriodInFrames);

		if (FAILED(hr)) throw std::runtime_error("IAudioClient.GetDevicePeriod error");
		std::cout << "default device period=" << defaultPeriodInFrames << "[nano seconds]" << std::endl;
		std::cout << "minimum device period=" << minPeriodInFrames << "[nano seconds]" << std::endl;

		hr = pAudioClient->InitializeSharedAudioStream(AUDCLNT_STREAMFLAGS_EVENTCALLBACK, minPeriodInFrames, &wave_format, nullptr);

		if (FAILED(hr)) throw std::runtime_error("IAudioClient.Initialize error");

		std::cout << "IAudioClient.Initialize()->OK" << std::endl;

		// event
		audioSamplesReadyEvent = CreateEvent(nullptr, false, false, nullptr);
		if (FAILED(hr)) throw std::runtime_error("CreateEvent error");

		hr = pAudioClient->SetEventHandle(audioSamplesReadyEvent);
		if (FAILED(hr)) throw std::runtime_error("IAudioClient.SetEventHandle error");

		UINT32 numBufferFrames = 0;
		hr = pAudioClient->GetBufferSize(&numBufferFrames);

		if (FAILED(hr)) throw std::runtime_error("IAudioClient.GetBufferSize error");
		std::cout << "buffer frame size=" << numBufferFrames << "[frames]" << std::endl;

		hr = pAudioClient->GetService(__uuidof(IAudioRenderClient), reinterpret_cast<void**>(&pAudioRenderClient));
		std::cout << std::hex << hr << std::endl;
		if (FAILED(hr)) throw std::runtime_error("IAudioClient.GetService error");

		BYTE *pData = nullptr;
		hr = pAudioRenderClient->GetBuffer(numBufferFrames, &pData);
		if (FAILED(hr)) throw std::runtime_error("IAudioRenderClient.GetBuffer error");

		const char* flac_filename = "audio/07 DaMonz - Choose Your Destiny (Super Smash Bros. Melee).flac";
		std::ifstream stream(flac_filename, std::ifstream::binary);
		FLACStreamDecoder streamer(stream);
		auto initStatus = streamer.init();
		if (FLAC__STREAM_DECODER_INIT_STATUS_OK != initStatus) {
			std::cerr << "ERROR INITIALIZING" << std::endl;
		}

		else {
			if (!streamer.process_until_end_of_metadata()) {
				std::cerr << "FAILED PROCESSING" << std::endl;
			}
			else {
				std::cerr << "SUCCEEDED PROCESSING" << std::endl;
			}
		}

		hr = pAudioRenderClient->ReleaseBuffer(numBufferFrames, 0);
		if (FAILED(hr)) throw std::runtime_error("IAudioRenderClient.ReleaseBuffer error");

		AudioClientProperties audioClientProp = {};
		audioClientProp.cbSize = sizeof(AudioClientProperties);
		audioClientProp.bIsOffload = true;
		audioClientProp.eCategory = AUDIO_STREAM_CATEGORY::AudioCategory_GameMedia;
		audioClientProp.Options = AUDCLNT_STREAMOPTIONS::AUDCLNT_STREAMOPTIONS_MATCH_FORMAT;
		pAudioClient->SetClientProperties(&audioClientProp);

		hr = pAudioClient->Start();
		if (FAILED(hr)) throw std::runtime_error("IAudioClient.Start error");
		std::cout << "IAudioClient.Start()->OK" << std::endl;

		FLAC__uint64 pos = 0;
		streamer.get_decode_position(&pos);

		bool playing = true;
		while (playing)
		{
			WaitForSingleObject(audioSamplesReadyEvent, INFINITE);

			uint32_t numPaddingFrames = 0;
			hr = pAudioClient->GetCurrentPadding(&numPaddingFrames);
			if (FAILED(hr)) throw std::runtime_error("IAudioClient.GetCurrentPadding error");

			uint32_t numAvailableFrames = numBufferFrames - numPaddingFrames;
			if (numAvailableFrames == 0) { 
				continue;
			}

			hr = pAudioRenderClient->GetBuffer(numAvailableFrames, &pData);
			if (FAILED(hr)) { 
				throw std::runtime_error("IAudioRenderClient.GetBuffer error"); 
			}

			// TODO: Implement a proper "buffer"
			if (audioFrames.size() > numBufferFrames) {
				audioFrames.clear();
			}

			for (size_t i = 0; i < numAvailableFrames; ++i) {
				streamer.process_single();
				pData[i] = reinterpret_cast<BYTE>(audioFrames.data());
			}

			pData = reinterpret_cast<BYTE*>(*audioFrames.data());

			hr = pAudioRenderClient->ReleaseBuffer(numAvailableFrames, 0);
			if (FAILED(hr)) throw std::runtime_error("IAudioRenderClient.ReleaseBuffer error");

			streamer.get_decode_position(&pos);
			playing = (streamer.get_total_samples() - pos) > numAvailableFrames;
		}

		do
		{
			// wait for buffer to be empty
			WaitForSingleObject(audioSamplesReadyEvent, INFINITE);

			uint32_t numPaddingFrames = 0;
			hr = pAudioClient->GetCurrentPadding(&numPaddingFrames);
			if (FAILED(hr)) throw std::runtime_error("IAudioClient.GetCurrentPadding error");

			if (numPaddingFrames == 0)
			{
				std::cout << "current buffer padding=0[frames]" << std::endl;
				break;
			}
		} while (true);

		hr = pAudioClient->Stop();
		if (FAILED(hr)) throw std::runtime_error("IAudioClient.Stop error");
		std::cout << "IAudioClient.Stop()->OK" << std::endl;

	}
	catch (std::exception& ex)
	{
		std::cout << "error:" << ex.what() << std::endl;
	}

}

WASAPIBackend::~WASAPIBackend()
{
	//CoTaskMemFree(pMixFormat);
	if (audioSamplesReadyEvent) CloseHandle(audioSamplesReadyEvent);
	SafeRelease(reinterpret_cast<IUnknown**>(&pAudioRenderClient));
	SafeRelease(reinterpret_cast<IUnknown**>(&pAudioClient));
	SafeRelease(reinterpret_cast<IUnknown**>(&pDevice));
	SafeRelease(reinterpret_cast<IUnknown**>(&pDeviceEnumerator));
	CoUninitialize();
}

 

I just need another nudge in the right direction. If anybody could help me figure out what's wrong this time, I would greatly appreciate it.

Have your read the docs properly?

Again, you play one sample at a time per channel so the FLAC block-size should match a value that it fits fully into the audio buffer of the WASAPI API. They wrote something of 4608 bytes as blocksize for 48 kHz playback rate.

I don't know if you understand audio hardware correct, you shouldn't provide a whole frame from any format instead you provide a buffer of an arbitary size so that you could mix your samples and provide data when hardware playback reaches the point where it needs them. So all you have to provide is a buffer that is big enougth to last a few hundret ms.

I played arround the old WAVEAPI some time ago and provided a buffer of certain size where those size was arround 2 seconds of audio that I grabbed from my mic. WAVEAPI then took that buffer into playback and I got the whole record out of my speakers.

As I understand, those APIs aren't very different in use so you provide a buffer for each channel that has the same size as every other channel's buffer and gibe it "at least" a few ms space so the formular should be playbackRate * sampleRate * time in bytes and milliseconds, then start streaming your audio into that buffer until it is filled and await the end of of playback to start with the next bytes of data.

You might have several buffers in best case so that one could playback while another is being filled with next data to swap them immediatly when WASAPI returns from playback. This is the Double-/or even Tripplebuffering term you have in graphics coding

I got back into this after about a week, and got it working. What remains should be simple. Re-sampling and making the decoder channel independent.

 

The initialization code didn't change, and I got the chance to refactor some of the code (this code is extremely WIP still)

 


void WASAPIBackend::Play() {
	try
	{
		UINT32 numBufferFrames = 0;
		hr = pAudioClient->GetBufferSize(&numBufferFrames);

		if (FAILED(hr)) { 
			throw std::runtime_error("IAudioClient.GetBufferSize error: " + std::to_string(hr));
		}

		std::cout << "buffer frame size = " << numBufferFrames << "[frames]" << std::endl;

		BYTE *pData = nullptr;
		hr = pAudioRenderClient->GetBuffer(numBufferFrames, &pData);
		if (FAILED(hr)) {
			throw std::runtime_error("IAudioRenderClient.GetBuffer error: " + std::to_string(hr));
		}

		std::filesystem::path p = "audio/07 DaMonz - Choose Your Destiny (Super Smash Bros. Melee).flac";
		std::ifstream stream(p, std::ifstream::binary);
		FLACStreamDecoder streamer(stream);

		auto initStatus = streamer.init();

		if (FLAC__StreamDecoderInitStatus::FLAC__STREAM_DECODER_INIT_STATUS_OK != initStatus) {
			std::cerr << "ERROR INITIALIZING" << std::endl;
		}

		else {
			if (!streamer.process_until_end_of_metadata()) {
				std::cerr << "FAILED PROCESSING" << std::endl;
			}
			else {
				std::cerr << "SUCCEEDED PROCESSING" << std::endl;
			}
		}

		hr = pAudioRenderClient->ReleaseBuffer(numBufferFrames, AUDCLNT_BUFFERFLAGS_SILENT);

		if (FAILED(hr)) { 
			throw std::runtime_error("IAudioRenderClient.ReleaseBuffer error: " + std::to_string(hr));
		}

		hr = pAudioClient->Start();
		if (FAILED(hr)) {
			throw std::runtime_error("IAudioClient.Start error: " + std::to_string(hr));
		}

		std::cout << "IAudioClient.Start()->OK" << std::endl;

		unsigned int read_pos = 0;

		bool playing = true;
		while (playing)
		{
			WaitForSingleObject(audioSamplesReadyEvent, INFINITE);

			uint32_t numPaddingFrames = 0;
			hr = pAudioClient->GetCurrentPadding(&numPaddingFrames);
			if (FAILED(hr)) { 
				throw std::runtime_error("IAudioClient.GetCurrentPadding error: " + std::to_string(hr));
			}

			uint32_t numAvailableFrames = numBufferFrames - numPaddingFrames;
			if (numAvailableFrames == 0) {
				continue;
			}

			hr = pAudioRenderClient->GetBuffer(numAvailableFrames, &pData);
			if (FAILED(hr)) {
				throw std::runtime_error("IAudioRenderClient.GetBuffer error: " + std::to_string(hr));
			}

			read_pos = streamer.loadBuffer((int32_t*)pData, numAvailableFrames);

			//std::copy(pData, pData + numAvailableFrames, std::ostream_iterator<int32_t>(std::cout, " "));

			hr = pAudioRenderClient->ReleaseBuffer(numAvailableFrames, 0);
			if (FAILED(hr)) {
				throw std::runtime_error("IAudioRenderClient.ReleaseBuffer error: " + std::to_string(hr));
			}

			playing = (read_pos != numAvailableFrames);
		}

		do
		{
			// wait for buffer to be empty
			WaitForSingleObject(audioSamplesReadyEvent, INFINITE);

			uint32_t numPaddingFrames = 0;
			hr = pAudioClient->GetCurrentPadding(&numPaddingFrames);
			if (FAILED(hr)) {
				throw std::runtime_error("IAudioClient.GetCurrentPadding error" + std::to_string(hr));
			}

			if (numPaddingFrames == 0)
			{
				std::cout << "current buffer padding = 0[frames]" << std::endl;
				break;
			}
		} while (true);

		hr = pAudioClient->Stop();
		if (FAILED(hr)) {
			throw std::runtime_error("IAudioClient.Stop error: " + std::to_string(hr));
		}

		std::cout << "IAudioClient.Stop() -> OK" << std::endl;

	}
	catch (std::exception& ex)
	{
		std::cout << std::hex << "Error: " << ex.what() << std::endl;
	}
}

And the parts that changed on the FLAC decoder:

 


class FLACStreamDecoder : public FLAC::Decoder::Stream {
public:
	~FLACStreamDecoder();
	// The FLAC decoder will take ownership of the ifstream.
	FLACStreamDecoder(std::ifstream& arg) : FLAC::Decoder::Stream(), input(arg), priv_frame(nullptr), pos(0), len(0), remaining(0), wbuffer(nullptr), ibuffer(), bufferpos(0) { }

	size_t loadBuffer(int *ptr, size_t bufferLength);

private:
	std::ifstream& input;
	virtual void metadata_callback(const FLAC__StreamMetadata *);
	virtual ::FLAC__StreamDecoderReadStatus read_callback(FLAC__byte *, size_t *);
	virtual ::FLAC__StreamDecoderWriteStatus write_callback(const FLAC__Frame *, const FLAC__int32 * const *);
	virtual void error_callback(FLAC__StreamDecoderErrorStatus);
	virtual ::FLAC__StreamDecoderSeekStatus seek_callback(FLAC__uint64);
	virtual ::FLAC__StreamDecoderTellStatus tell_callback(FLAC__uint64 *);
	virtual ::FLAC__StreamDecoderLengthStatus length_callback(FLAC__uint64 *);
	virtual bool eof_callback();

	const int32_t * const * wbuffer; // libFLAC buffer
	const FLAC__Frame * priv_frame; // We only work with these ^ two.

	int32_t* buf = nullptr; // Buffer passed onto this function.
	unsigned int pos, len, remaining;
	
	int32_t * ibuffer[FLAC__MAX_CHANNELS]; // Intermediary buffer

	unsigned bufferpos;

	void flacReadLoop();

	unsigned int bufferCopy();
}


FLAC__StreamDecoderWriteStatus FLACStreamDecoder::write_callback(const FLAC__Frame * frame, const FLAC__int32 * const * buffer) {
	priv_frame = frame;
	bufferpos = 0;

	wbuffer = buffer;

	bufferCopy();

	return FLAC__StreamDecoderWriteStatus::FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE;
}

size_t FLACStreamDecoder::loadBuffer(int *ptr, size_t bufferLength)
{
		size_t total = 0;

		// TODO: Make this channel independent.
		bufferLength *= 2;

		while (total < bufferLength)
		{
			buf = ptr + total;
			// The decoder crashes if the read length is > (2^24) - 1
			len = (bufferLength - total > 0x1000000) ? 0x1000000 : (unsigned int)(bufferLength - total);
			flacReadLoop();
			if (pos == 0) {
				break;
			}
			total += pos;
		};

		return total;
}

void FLACStreamDecoder::flacReadLoop()
{
	pos = 0;
	remaining = len;

	FLAC::Decoder::Stream::State state = get_state();
	if (state > FLAC__STREAM_DECODER_END_OF_STREAM)
	{
		std::cout << "FLAC::Decoder::Stream::State " << state.as_cstring() << std::endl;
		// The current frame isn't useful, the decoder failed, and thus, the frame is unusable.
		priv_frame = nullptr;
	};

	// Copy the data that is already in the decoder buffer (the buffer contains only decoded data)
	if (priv_frame != nullptr  && bufferpos < priv_frame->header.blocksize) {
		bufferCopy();
	}

	// Decode frames until pos is > len
	while (pos < len)
	{
		if (process_single() == 0) {
			break;
		}

		state = get_state();

		if (state >= FLAC__STREAM_DECODER_END_OF_STREAM)
		{
			std::cout << "FLAC::Decoder::Stream::State " << state.as_cstring() << std::endl;
			priv_frame = nullptr;
			break;
		};
	};

	buf = nullptr;
}

unsigned int FLACStreamDecoder::bufferCopy()
{
	unsigned int i = 0, j, offset, channels, ulen;

	channels = std::min(priv_frame->header.channels, FLAC__MAX_CHANNELS);

	if (buf == nullptr) {
		for (i = 0; i < channels; i++)
		{
			if (ibuffer[i] == nullptr) {
				ibuffer[i] = new int32_t[priv_frame->header.blocksize];
			}

			memcpy(ibuffer[i], wbuffer[i], priv_frame->header.blocksize * sizeof(int32_t));
		};
		wbuffer = (const int32_t* const*)ibuffer;

		return 0;
	};

	ulen = std::min(len, priv_frame->header.blocksize);

	if (remaining % channels != 0)
	{
		std::cout << "Error: more remaining channels than channels on the stream (" << remaining << " remain, " << channels << " total channels)" << std::endl;
		return 0;
	};

	// If a bit-shift isn't performed, the volume will be extremely low. (This is a bug, right now the decoder will always upscale to 32bit bitrate)
	int shift = 32 - priv_frame->header.bits_per_sample;
	for (i = 0; i < ulen && remaining > 0; i++)
	{
		offset = pos + i * channels;

		if (bufferpos >= priv_frame->header.blocksize) {
			break;
		}

		if (offset + channels > len) {
			break;
		}

		for (j = 0; j < channels; j++) {
			buf[offset + j] = ((uint32_t)wbuffer[j][bufferpos]) << shift;
		}

		remaining -= channels;
		bufferpos++;
	}

	offset = i * channels;
	pos += i * channels;

	return offset;
}

 

The code is extremely brittle, but it works. After some major refactoring, it should be usable without manually having to set the MixFormat

This topic is closed to new replies.

Advertisement