Introducing: A grammar system for games with lots of text

Started by
3 comments, last by choffstein 18 years, 9 months ago
Now, don't get too excited -- this isn't a Rob Loach quality announcement, but I thought a few of you might like this code I whipped up. I saw this idea on Gamasutra who knows how long ago, and cleaning up my webserver I found my C implementation of it and decided to redo it with C++. So what is it? Well, if you game is heavily text driven (RPGs, etc), you will realize that you cannot have a string for every single instance. "The troll drops 3 coins", "The troll drops 5 coins", "The troll drops an apple" -- and even if you do figure out some way to cut down on the number of strings, how do you deal with grammar rules? And that is where my grammar parser comes in. By giving each keyword a certain grammar state, you can create sentences that are shaped by the grammar rules. Example: visage[Pm] would mean the word visage is a male proper noun. So what are the different rules? P - proper noun p - plural v - starts with a vowel s - ends with s m - masculine f - feminine n - neuter So how do you create these sentences? Simple! By using a couple of sentence rules: $n - prints out the word at the index n #n - opens up a grammar parse for n Example: $0 played with #0{his[m]|her[f]|its[n]} $1. Now, if you throw it through the parser with "Corey[Pm]" being the first argument, and "computer[n]" being the second, the sentence ends up being: Corey played with his computer. Pretty neat, right? What you should note however is that the Grammar parsing system works on a "best fit" basis. So even though I didn't have [Pm] as an exact option, [m] was the best fit for "Corey[Pm]", so it used that. So, visage, how do you use it, you might ask? Simple, I wrote all the code for you. All you need to do is create an instance of the GrammarParser and use the MetaParse method. Like so:

GrammarParser gp;
cout << gp.MetaParse("The $0 threw #1{an [v]|[vp]||a }$1 at $2.", 3, "Gorilla[m]","apples[vp]", "visage[Pm]") << endl;
cout << gp.MetaParse("The #0{male[m]|female[f]|neuter[n]} $0 gave $1 coins to the pretty $2", 3, "troll[m]", "10[n]", "girl[f]");



Please note how important spacing is. Things are going to look funky unless you take into account all possibilities. If you look, you will see that I placed the $1 at the end of the parse statement for #1 because there was a possibility that there would be no space (only spaces on default and [v] case). The default case only runs if the is NO BETTER CASE. I hope some of you enjoy this. I will try to clean up the code a little more later and make it more readable. If you find any bugs, please let me know, and I will fix them ASAP.

#include <iostream>
#include <vector>
#include <sstream>
#include <string>
#include <stdarg.h>

using namespace std;

/* The word class -- defines the grammar rules for a word */
class Word
{
	public:
		string value;
		//Plural, neuter, feminine, masculine, plural, starts with a vowel, ends with s
		bool P, n, f, m, p, v, s;
		int requiredForMatch;
		
		Word() :
		P(false), n(false), f(false), m(false), p(false), v(false), s(false), requiredForMatch(0)
		{
		}
		
		int compare(Word t);
};

/* Compares two words based on their grammar rules and returns the number correct in the rules.
   7 is the best score you can recieve 
*/
int Word::compare(Word t)
{
	int numCorrect = 0;
	if(P && P == t.P) numCorrect++;
	if(n && n == t.n) numCorrect++;
	if(f && f == t.f) numCorrect++;
	if(m && m == t.m) numCorrect++;
	if(p && p == t.p) numCorrect++; 
	if(v && v == t.v) numCorrect++;
	if(s && s == t.s) numCorrect++;
	
	return numCorrect;
}

/* Our grammar parsing class */
class GrammarParser
{
	private:
		string currentLine; //our current line
		int currentPosition; //our current position in the line
		int currentArgumentParsing;
		vector<Word*> arguments;

		int getNextInt();
		string ReturnWordLiteral();
		void getArgumentToParse();
		bool isToken(char c);
		//char getNextToken();
		char getChar();
		char getNextChar();
		string returnFrom(string word, int start, int end);
		bool isANumber(char c);
		int getLocationOfNext(string word, int start, char c);
		int getClosestDelimeter(string word, int start);
		Word *makeWordFromString(string word);
		Word *getNextWord();

	public:
		GrammarParser();
		~GrammarParser();
		string MetaParse(string line, int argcount, ...);
};

GrammarParser::GrammarParser() : currentLine(""), currentPosition(0), currentArgumentParsing(-1)
{
}

GrammarParser::~GrammarParser()
{
	for(int i = 0; i < arguments.size(); ++i)
		delete arguments;
	arguments.clear();
}

/* Checks if the current character is a token or not. */
bool GrammarParser::isToken(char c)
{
	if(c == '$' || c == '#' || c == '{' || c == '}' || c == '|' || c == '[' || c == ']')
		return true;
	return false;
}

/* Gets the current char.  '0' if it is the end of the line. */
char GrammarParser::getChar()
{
	if(currentPosition == currentLine.size()) return '0';
	return currentLine[currentPosition];
}

/* Gets the next character.  '0' if it is the end of the line */
char GrammarParser::getNextChar()
{
	if(currentPosition == currentLine.size() - 1) return '0';
	return currentLine[++currentPosition];
}

/* returns a substring from start to end in the string word */
string GrammarParser::returnFrom(string word, int start, int end)
{
	if(end > word.size())
		return NULL;
	
	//special case -- there is nothing there
	if(end-start == 0)
		return "";
	
	string s(word, start, end-start);
	return s;
}

/* Gets the index location of the next "char" in word, starting at the start index value */
int GrammarParser::getLocationOfNext(string word, int start, char c)
{
	for(int i = start; i < word.size(); ++i)
	{
		if(word == c)
			return i;
	}
	return -1;
}

/* Checks if char is a number or not */
bool GrammarParser::isANumber(char c)
{
	if(c == '0' || c=='1' || c == '2' || c=='3' || c=='4' || c=='5' || c=='6' || c=='7' || c == '8' || c == '9')
		return true;
	return false;
}

/* Returns the location of the closest delimeter for an index value -- basically, any non number */
int GrammarParser::getClosestDelimeter(string word, int start)
{
	for(int i = start; i < word.size(); ++i)
	{
		if(!isANumber(word))			
			return i;
	}
	return -1;
}

/* Returns a word class from a string */
Word *GrammarParser::makeWordFromString(string word)
{
	Word *ourWord = new Word();
	
	//special case
	if(word.size() == 0)
	{
		return ourWord;
	}
	
	/* This section checks to see where the word ends, and where the rules begin */
	int openParseReq = getLocationOfNext(word, 0, '[');
	int closeParseReq = getLocationOfNext(word, openParseReq, ']');
	
	/* If there is no [, it is probably a default case, which means there will either be a | or } coming up */
	if(openParseReq == -1)
	{
	    /* Find the next ending location */
		openParseReq = getLocationOfNext(word, 0, '}');
		int openParseReq2 = getLocationOfNext(word, 0, '|');
		/* Find the closer one. */
		if(openParseReq2 > 0 && openParseReq2 < openParseReq) 
			openParseReq = openParseReq2;
	}
	
	/* Get the value of the word */
	ourWord->value = returnFrom(word, 0, openParseReq);
	
	//Now set the rules, assuming there are rules (aka, not a default case)
	if(closeParseReq > 0)
	{
		for(openParseReq; openParseReq != closeParseReq; ++openParseReq)
		{
			switch(word[openParseReq])
			{
				case 'P':{ ourWord->P = true; ourWord->requiredForMatch++; } break;
				case 'n':{ ourWord->n = true; ourWord->requiredForMatch++; } break;
				case 'f':{ ourWord->f = true; ourWord->requiredForMatch++; } break;
				case 'm':{ ourWord->m = true; ourWord->requiredForMatch++; } break;
				case 'p':{ ourWord->p = true; ourWord->requiredForMatch++; } break;
				case 'v':{ ourWord->v = true; ourWord->requiredForMatch++; } break;
				case 's':{ ourWord->s = true; ourWord->requiredForMatch++; } break;
				default: {} break;
			}
		}
	}
	
	return ourWord;
}

int GrammarParser::getNextInt()
{
	int endLiteral = getClosestDelimeter(currentLine, currentPosition+1); //white space or puncuation ends a number
	string number;
	if(endLiteral == -1)
		number = returnFrom(currentLine, currentPosition+1, currentLine.size());
	else
		number = returnFrom(currentLine, currentPosition+1, endLiteral+1);
	int value = atoi(number.c_str());
					
	if(endLiteral != -1)
		currentPosition = endLiteral-1;
	else
		currentPosition = currentLine.size();
		
	return value;
}

string GrammarParser::ReturnWordLiteral()
{
	int value = getNextInt();
	if(value < arguments.size())
		return arguments[value]->value;
	
	return "";
}

void GrammarParser::getArgumentToParse()
{
	int value = getNextInt();
	currentArgumentParsing = value;
}

Word *GrammarParser::getNextWord()
{
	int endMatch = getLocationOfNext(currentLine, currentPosition+1, '}');
	int endMatch2 = getLocationOfNext(currentLine, currentPosition+1, '|');
	if(endMatch2 > 0 && endMatch2 < endMatch) 
		endMatch = endMatch2;
											
	string word = returnFrom(currentLine, currentPosition+1, endMatch+1);
	Word *temp = makeWordFromString(word);
	return temp;
}

//our big meta parse function
string GrammarParser::MetaParse(string line, int argcount, ...)
{
	//make sure there is nothing left over from an old parse
	for(int i = 0; i < arguments.size(); ++i)
		delete arguments;
	arguments.clear();
	currentLine.clear();
	
	ostringstream finalLine;
	
	/* Here we create a vector of Word classes based on the arguments we gave */
	va_list args;
	va_start(args, argcount); //set a pointer to the start of the list
	for(int currArg = 0; currArg < argcount; ++currArg)
	{
		string s = va_arg(args, char *);
		arguments.push_back(makeWordFromString(s));
	}
	va_end(args);
	
	/* Now that we have the list of arguments, we can parse the actual line */
	currentLine = line;
	currentPosition = -1;
	//while we have a next token
	char c;
	while((c = getNextChar()) != '\0' && c != '0')
	{
		//figure out which token it is
		switch(c)
		{
			case '$': //this introduces a word literal.  The next value, which is a number, tells which argument to use
			{
				finalLine << ReturnWordLiteral();
			} break;
			case '#': //this introduces a parse -- we need to find the current argument to parse
			{
				getArgumentToParse(); //figure out which argument we should be parsing for
			} break;
			case '{': //this opens a parsing statement
			{
				//make sure we are actually parsing something
				if(currentArgumentParsing != -1)
				{
					//The choices we have...
					vector<Word*> choices;
					Word *defaultChoice = NULL;
					int indexClosest = -1;
					int numForClosest = 0;
					char c = getChar();
					//while we are not at the end of the parsing statement
					for(c; c != '}'; c = getChar())
					{
						Word *temp = getNextWord();
						if(temp->requiredForMatch == 0)
						{
							//default case if there isn't a match
							defaultChoice = temp;
						}
						else
						{
							choices.push_back(temp);
							int compareValue = temp->compare(*arguments[currentArgumentParsing]);
							if(compareValue > numForClosest)
							{
								numForClosest = compareValue;
								indexClosest = choices.size() - 1;
							}
						}
						
						//find the next location for our pointer
						int nextLoc = getLocationOfNext(currentLine, currentPosition+1, '}');
						int nextLoc2 = getLocationOfNext(currentLine, currentPosition+1, '|');
						if(nextLoc2 > 0 && nextLoc2 < nextLoc) 
							nextLoc = nextLoc2;
							
						currentPosition = nextLoc;
					}
					
					if(indexClosest == -1)
						finalLine << defaultChoice->value;
					else
						finalLine << choices[indexClosest]->value;
					currentArgumentParsing = -1;
					
					for(int i = 0; i < choices.size(); ++i)
						delete choices;
					choices.clear();
					if(defaultChoice)
						delete defaultChoice;
				}

			} break;
			default:
			{
				if(currentArgumentParsing == -1)
				{
					if(c == '\\' && currentPosition != currentLine.size())
						c = getNextChar();
					finalLine << c;
				} 
			} break;
		}
	}

	return finalLine.str();
}



Size: 352 Lines [Edited by - visage on June 22, 2005 8:12:41 PM]
Advertisement
Interesting. On a related note, here is a good article that talks about something very similar to what you are doing here.

http://www.gamasutra.com/resource_guide/20030916/heimburg_01.shtml
Infact, that is the exact article I got the idea from!

Maybe I will implement that '!' idea.
Also, I forgot to put in the ability to use escape sequences, so they are in there now (I edited the original code a bit).

The code is still a bit sloppy, but I tried to clean it up some more for you. Easier to debug, easier to follow what is going on.

Anyone think they have any use for this? At least it was fun to code.
You should seperate it into a header/source file it would be easier to implement. [smile]
Request granted.

MetaParser.h
#ifndef __METAPARSER_H__#define __METAPARSER_H__#include <iostream>#include <vector>#include <sstream>#include <string>#include <stdarg.h>using namespace std;/* The word class -- defines the grammar rules for a word */class Word{	public:		string value;		//Plural, neuter, feminine, masculine, plural, starts with a vowel, ends with s		bool P, n, f, m, p, v, s;		int requiredForMatch;				Word() :		P(false), n(false), f(false), m(false), p(false), v(false), s(false), requiredForMatch(0)		{		}				int compare(Word t);};/* Our grammar parsing class */class GrammarParser{	private:		string currentLine; //our current line		int currentPosition; //our current position in the line		int currentArgumentParsing;		vector<Word*> arguments;		int getNextInt();		string ReturnWordLiteral();		void getArgumentToParse();		bool isToken(char c);		//char getNextToken();		char getChar();		char getNextChar();		string returnFrom(string word, int start, int end);		bool isANumber(char c);		int getLocationOfNext(string word, int start, char c);		int getClosestDelimeter(string word, int start);		Word *makeWordFromString(string word);		Word *getNextWord();	public:		GrammarParser();		~GrammarParser();		string MetaParse(string line, int argcount, ...);};#endif


MetaParser.cpp
#include "MetaParser.h"/* Compares two words based on their grammar rules and returns the number correct in the rules.   7 is the best score you can recieve */int Word::compare(Word t){	int numCorrect = 0;	if(P && P == t.P) numCorrect++;	if(n && n == t.n) numCorrect++;	if(f && f == t.f) numCorrect++;	if(m && m == t.m) numCorrect++;	if(p && p == t.p) numCorrect++; 	if(v && v == t.v) numCorrect++;	if(s && s == t.s) numCorrect++;		return numCorrect;}GrammarParser::GrammarParser() : currentLine(""), currentPosition(0), currentArgumentParsing(-1){}GrammarParser::~GrammarParser(){	for(int i = 0; i < arguments.size(); ++i)		delete arguments;	arguments.clear();}/* Checks if the current character is a token or not. */bool GrammarParser::isToken(char c){	if(c == '$' || c == '#' || c == '{' || c == '}' || c == '|' || c == '[' || c == ']')		return true;	return false;}/* Gets the current char.  '0' if it is the end of the line. */char GrammarParser::getChar(){	if(currentPosition == currentLine.size()) return '0';	return currentLine[currentPosition];}/* Gets the next character.  '0' if it is the end of the line */char GrammarParser::getNextChar(){	if(currentPosition == currentLine.size() - 1) return '0';	return currentLine[++currentPosition];}/* returns a substring from start to end in the string word */string GrammarParser::returnFrom(string word, int start, int end){	if(end > word.size())		return NULL;		//special case -- there is nothing there	if(end-start == 0)		return "";		string s(word, start, end-start);	return s;}/* Gets the index location of the next "char" in word, starting at the start index value */int GrammarParser::getLocationOfNext(string word, int start, char c){	for(int i = start; i < word.size(); ++i)	{		if(word == c)			return i;	}	return -1;}/* Checks if char is a number or not */bool GrammarParser::isANumber(char c){	if(c == '0' || c=='1' || c == '2' || c=='3' || c=='4' || c=='5' || c=='6' || c=='7' || c == '8' || c == '9')		return true;	return false;}/* Returns the location of the closest delimeter for an index value -- basically, any non number */int GrammarParser::getClosestDelimeter(string word, int start){	for(int i = start; i < word.size(); ++i)	{		if(!isANumber(word))						return i;	}	return -1;}/* Returns a word class from a string */Word *GrammarParser::makeWordFromString(string word){	Word *ourWord = new Word();		//special case	if(word.size() == 0)	{		return ourWord;	}		/* This section checks to see where the word ends, and where the rules begin */	int openParseReq = getLocationOfNext(word, 0, '[');	int closeParseReq = getLocationOfNext(word, openParseReq, ']');		/* If there is no [, it is probably a default case, which means there will either be a | or } coming up */	if(openParseReq == -1)	{	    /* Find the next ending location */		openParseReq = getLocationOfNext(word, 0, '}');		int openParseReq2 = getLocationOfNext(word, 0, '|');		/* Find the closer one. */		if(openParseReq2 > 0 && openParseReq2 < openParseReq) 			openParseReq = openParseReq2;	}		/* Get the value of the word */	ourWord->value = returnFrom(word, 0, openParseReq);		//Now set the rules, assuming there are rules (aka, not a default case)	if(closeParseReq > 0)	{		for(openParseReq; openParseReq != closeParseReq; ++openParseReq)		{			switch(word[openParseReq])			{				case 'P':{ ourWord->P = true; ourWord->requiredForMatch++; } break;				case 'n':{ ourWord->n = true; ourWord->requiredForMatch++; } break;				case 'f':{ ourWord->f = true; ourWord->requiredForMatch++; } break;				case 'm':{ ourWord->m = true; ourWord->requiredForMatch++; } break;				case 'p':{ ourWord->p = true; ourWord->requiredForMatch++; } break;				case 'v':{ ourWord->v = true; ourWord->requiredForMatch++; } break;				case 's':{ ourWord->s = true; ourWord->requiredForMatch++; } break;				default: {} break;			}		}	}		return ourWord;}int GrammarParser::getNextInt(){	int endLiteral = getClosestDelimeter(currentLine, currentPosition+1); //white space or puncuation ends a number	string number;	if(endLiteral == -1)		number = returnFrom(currentLine, currentPosition+1, currentLine.size());	else		number = returnFrom(currentLine, currentPosition+1, endLiteral+1);	int value = atoi(number.c_str());						if(endLiteral != -1)		currentPosition = endLiteral-1;	else		currentPosition = currentLine.size();			return value;}string GrammarParser::ReturnWordLiteral(){	int value = getNextInt();	if(value < arguments.size())		return arguments[value]->value;		return "";}void GrammarParser::getArgumentToParse(){	int value = getNextInt();	currentArgumentParsing = value;}Word *GrammarParser::getNextWord(){	int endMatch = getLocationOfNext(currentLine, currentPosition+1, '}');	int endMatch2 = getLocationOfNext(currentLine, currentPosition+1, '|');	if(endMatch2 > 0 && endMatch2 < endMatch) 		endMatch = endMatch2;												string word = returnFrom(currentLine, currentPosition+1, endMatch+1);	Word *temp = makeWordFromString(word);	return temp;}//our big meta parse functionstring GrammarParser::MetaParse(string line, int argcount, ...){	//make sure there is nothing left over from an old parse	for(int i = 0; i < arguments.size(); ++i)		delete arguments;	arguments.clear();	currentLine.clear();		ostringstream finalLine;		/* Here we create a vector of Word classes based on the arguments we gave */	va_list args;	va_start(args, argcount); //set a pointer to the start of the list	for(int currArg = 0; currArg < argcount; ++currArg)	{		string s = va_arg(args, char *);		arguments.push_back(makeWordFromString(s));	}	va_end(args);		/* Now that we have the list of arguments, we can parse the actual line */	currentLine = line;	currentPosition = -1;	//while we have a next token	char c;	while((c = getNextChar()) != '\0' && c != '0')	{		//figure out which token it is		switch(c)		{			case '$': //this introduces a word literal.  The next value, which is a number, tells which argument to use			{				finalLine << ReturnWordLiteral();			} break;			case '#': //this introduces a parse -- we need to find the current argument to parse			{				getArgumentToParse(); //figure out which argument we should be parsing for			} break;			case '{': //this opens a parsing statement			{				//make sure we are actually parsing something				if(currentArgumentParsing != -1)				{					//The choices we have...					vector<Word*> choices;					Word *defaultChoice = NULL;					int indexClosest = -1;					int numForClosest = 0;					char c = getChar();					//while we are not at the end of the parsing statement					for(c; c != '}'; c = getChar())					{						Word *temp = getNextWord();						if(temp->requiredForMatch == 0)						{							//default case if there isn't a match							defaultChoice = temp;						}						else						{							choices.push_back(temp);							int compareValue = temp->compare(*arguments[currentArgumentParsing]);							if(compareValue > numForClosest)							{								numForClosest = compareValue;								indexClosest = choices.size() - 1;							}						}												//find the next location for our pointer						int nextLoc = getLocationOfNext(currentLine, currentPosition+1, '}');						int nextLoc2 = getLocationOfNext(currentLine, currentPosition+1, '|');						if(nextLoc2 > 0 && nextLoc2 < nextLoc) 							nextLoc = nextLoc2;													currentPosition = nextLoc;					}										if(indexClosest == -1)						finalLine << defaultChoice->value;					else						finalLine << choices[indexClosest]->value;					currentArgumentParsing = -1;										for(int i = 0; i < choices.size(); ++i)						delete choices;					choices.clear();					if(defaultChoice)						delete defaultChoice;				}			} break;			default:			{				if(currentArgumentParsing == -1)				{					if(c == '\\' && currentPosition != currentLine.size())						c = getNextChar();					finalLine << c;				} 			} break;		}	}	return finalLine.str();}

This topic is closed to new replies.

Advertisement