Sign in to follow this  
vaneger

run length encoder/ decoder not working right

Recommended Posts

/*
template<typename T>
struct CRLE
{
    T data;
    unsigned char byte;
};
*/
template<typename T>
vector<CRLE<T>> RLE(const vector<T> &vec)
{
	CRLE<T> temp;
	vector<CRLE<T>> result;
	int freq = 1;
	int i = 0, j = 0;
	int end = vec.size() - 2;
	for(i=0;i<end;i++)
	{
		temp.data = vec[i];
		j = i + 1;
		while((j < vec.size() )&& (vec[j] == temp.data))
		{
			freq++;
			j++;
			if(freq == 255)
				break;
		}
		temp.byte = static_cast<unsigned char> (freq);
		result.push_back(temp);
		i = j;
		freq = 1;
	}
	return result;

        compressed = RLE<int>(fileData);

	for(x =0;x<compressed.size();x++)
	{
		j = x;
		if(compressed[x].byte > unsigned char(1))
			file<<static_cast<unsigned char> (0)<<compressed[x].byte<<compressed[x].data;
		else
		{
			while(compressed[j].byte == 1)
			{
				j++;
				if(j >= compressed.size())
				{
					j--;
					break;
				}
			}
			file<<j+1;
			n = j;
			while(j <= x)
			{
				file<<compressed[x-j].data;
				j--;
			}
			x+=n;
		}
	}
	file.close();

	vector<int> fileArray;
        int mSize = sizeof(int);
	ifstream input;
	ofstream output;
	output.open("deltaX.bmp");
	size = get_file_size("delta.txt");
	cout<<"file size is "<<size<<endl;
	input.open("delta.txt");
	while(size >=0 )
	{
		input>>readD;
		size--;
		if(readD == 0)
		{
			input>>readD;
			size--;
			for(x = static_cast<int> (readD); x > 0 ; x--)
			{
				input>>n;
				size-=mSize;
				fileArray.push_back(n);
				if(size <= 0)
					break;
			}
		}
		else
		{
			for(x = static_cast<int> (readD); x > 0 ; x--)
			{
				input>>n;
				size-=mSize;
				fileArray.push_back(n);
				if(size <= 0)
					break;
			}
		}
				if(size <= 0)
					break;
	}
	cout<<"array size "<<fileArray.size()<<endl;
	for(x = 0;x<fileArray.size();x++)
		output<<fileArray[x];

deltaX.bmp ends up being 70 bytes instead of 7 kb like the source file(which is read into fileData correctly prior to this section of code).

Share this post


Link to post
Share on other sites
Your code makes very little sense and is horribly documented. Either explain what should be happening in comments and/or use much better variable names. Also, a general overview of how it should be working would help.

Share this post


Link to post
Share on other sites
RLE is a naive run length encoding function.

The second section of code parses the RLE data and outputs it to a file. If there is batch of single data items instead of a run the ouput should be the length til the next run ( ie if you have abcdefffff then the output should be 5 followed by abcde). If you have a run the output should be 0 followed by the length of the run followed by the item that is repeated. (given abcdefffff output is 05f). The total output for that given string should be 5abcde05f.

Share this post


Link to post
Share on other sites

while(j <= x)
// needed to be
while(j >= x)

//also I changed
i = j;
//to
if(freq > 1)
i = j;
// also in the RLE function end was wrong and needed to be
// array.size() -1 instead of -2



Now the function works correctly for reading and outputting RLE data. I still need to fix the part that reconstructs the original file.

Share this post


Link to post
Share on other sites

n = 0;
// output 0 followed by size of run (in bytes) followed by data
if(static_cast<int> (compressed[j].byte) > 1)
file<<static_cast<unsigned char>(n)<<compressed[x].byte<<compressed[x].data;



n = 0;
ifstream input;
input.open("RLE.txt");
size = get_size("RLE.txt");
unsigned char readD;
while(size >=0 )
{
input>>readD;
size--;
/*
if readD is 0 then a run of data follows
therefor read in the next byte, which
should be the size of the run as per the
code section above
*/

if(static_cast<int> (readD) == n)
{
input>>readD;
cout<<"run found"<<endl;
}
if(static_cast<int> (readD) > 0)
{
for(x = static_cast<int> (readD);x > 0;x--)
{
input>>readD;
size--;
temp.byte = static_cast<unsigned char> (1);
temp.data = static_cast<char> (readD);
fileArray.push_back(temp);
if(size <= 0)
break;
}
}
}


I'm not sure how to get this to work properly. For a string like "abcdefffff" the output should be "5abcde05f" but instead I get "5abcdeff".

I specifically try to output a 0 to signify an upcoming run of data but it doesn't seem to work right.

Share this post


Link to post
Share on other sites
ASCII treats digits as just other symbols, the same as letters. They have their own numeric values that are not the same as the numeric value of the digit. For example, a char that represents the symbol '0', interpreted as an integer value, is actually the value 48.

For your purposes, you probably do not want to try to make the output look like a digit, because then you can only count up to 9. Instead, just write the run length as a byte, and accept the fact that in general, it will look (when interpreted as text) like some weird symbol.

So "abcdefffff" gets encoded as '<byte with value 5>' 'a' 'b' 'c' 'd' 'e' '<byte with value 0>' '<byte with value 5>' 'f'. You decode that as follows:

- Read a byte.
- It's not zero, so we read the corresponding number of non-run bytes.
- Read a byte.
- This time it is zero, so read the next two bytes.
- The first byte has value 5 (NOT a representation of the digit '5'), and the second byte is the symbol 'f', so output 'f' 5 times.

Thus our decoder has to look like:


While (non_run_count <- byte from encoded data):
If non_run_count is zero:
run_count <- byte from encoded data.
run_symbol <- byte from encoded data.
Do run_count times:
emit run_symbol.
Else:
Do non_run_count times:
emit byte from encoded data.


Right now you're not reading run_symbol or doing anything with it, just leaving it for the next time through the loop. You're not even decrementing the size counter there, which causes other problems. Do not try to handle files by any technique involving counting their length ahead of time. This way lies pain and misery. Instead, just process the data until you run out. This can be done in a simple and idiomatic way: just use the read operation as the condition for your while loop.

Also, why are you using the variable 'n' to hold the constant 0? And why are you testing compressed[j] and then outputting data from compressed[x]?

Share this post


Link to post
Share on other sites

//from the original post
// the lines
file<<j;
//changed to
file<<unsigned char(j);





it works fine now, I accidentally outputted the length of a non run in numeric form as opposed to ascii form.

*Zahlman* I'm using size because after I get this working on a byte level I'm going to expand the RL encoding to work on strings and therefore need to read in the correct amount of data even in spite of variable length strings.

Share this post


Link to post
Share on other sites
ok so it worked on one example and when I moved to trying another it did not work.


for(x =0;x<compressed.size();x++)
{
int nonRun = 0;
if(int(compressed[x].byte) == 1)
{
while(int(compressed[x+nonRun].byte) == 1)
{
nonRun++;
if(x+nonRun >= compressed.size())
break;
}
file<<unsigned char(nonRun);
nonRun+=x;
while(x!=nonRun)
{
file<<compressed[x].data;
x++;
}
}
else
file<<unsigned char(0)<<compressed[x].byte<<compressed[x].data;
}
file.close();


ifstream input;
ofstream output;
output.open("deltaX.txt");
input.open("delta.txt");

unsigned char tempC;
while(!input.eof())
{
input>>tempC;
cout<<"numeric "<<int(tempC)<<endl;
if(int(tempC) >= 1)
{
for(x = int(tempC);x>0;x--)
{
input>>tempC;
cout<<x<<" "<<tempC<<endl;
output<<tempC;
}
}
else if(int(tempC) == 0)
{
input>>tempC;
cout<<"zero "<<tempC<<endl;
for(x = int(tempC);x>0;x--)
output<<tempC;
}
}



I'm currently testing using a text file with abcccdeeeeefffff in it. Proper output should be (value of 2)ab(value of 0)(value of 3)c(value of 1)d(value of 0)(value of 5)e(value of 0)(value of 5)f

I only see a,b and e the other letters aren't there, though there are some symbols.

Share this post


Link to post
Share on other sites
Quote:
Original post by vaneger
*Zahlman* I'm using size because after I get this working on a byte level I'm going to expand the RL encoding to work on strings and therefore need to read in the correct amount of data even in spite of variable length strings.


No, you don't. Put the string into a stringstream, and read it until there's no more data in the stream.

You don't even need to write new code. Just make the encoding and decoding functions accept istream&.

Quote:
I only see a,b and e the other letters aren't there, though there are some symbols.


Use a hex editor so you can determine exactly what is in the file.

Share this post


Link to post
Share on other sites

Create an account or sign in to comment

You need to be a member in order to leave a comment

Create an account

Sign up for a new account in our community. It's easy!

Register a new account

Sign in

Already have an account? Sign in here.

Sign In Now

Sign in to follow this