正規表現 R|S RS R* R+ のパーサ

regex.h

#ifndef _REGEX_INCLUDE_CHECK
#define _REGEX_INCLUDE_CHECK

#include <iostream>
using namespace std;

#define eleft e.child.left
#define eright e.child.right
#define ech e.ch

enum op_t {
  e_union,
  e_concat,
  e_closure,
  e_pclosure,
  e_empty,
  e_char
};

struct Leaf {
  op_t op;
  /*
   * if op == "char", then the param of this union is character
   * else , then the param of this union is the struct that represents
   * a left tree and a right tree.
   */
  union Entity {
	char ch;
	struct {
	  Leaf *left;
	  Leaf *right;
	} child;
  } e;
};

class Stree {
  char current;
  char *inspectedStr, *baseStr;
  Leaf *root;

  void perror(const char * ptr);
  char getNextCurrent();
  char getBackCurrent();
  void setNextCurrent();
  Leaf* growTree(Leaf* left, Leaf* right, op_t op);
  Leaf* regexp();
  Leaf* regItr1();
  Leaf* regItr2();
  Leaf* regItr3();
  void showItem(Leaf *leaf, int count);

public:
  Stree(char *setch) {
	inspectedStr = setch;
	baseStr = setch;
	current = *(inspectedStr);
	root = regexp();
  }
  
  Leaf* getRoot() { return root; }
  void showAll();
};

#endif

Regex.cpp

#include "regex.h"

inline char Stree::getNextCurrent()
{
  return *(inspectedStr+1);
}

inline char Stree::getBackCurrent()
{
  if (baseStr < (inspectedStr-1))
	return '\0';
  return *(inspectedStr-1);
}

inline void Stree::setNextCurrent()
{
  current = *(++inspectedStr);
}

/*
 * This function is mainly dealing with "union".
 */
Leaf* Stree::regexp()
{
  Leaf *xLeaf, *yLeaf;

  xLeaf = regItr1();

  if (current == '|') {
	setNextCurrent();
	yLeaf = regexp();
	xLeaf = growTree(xLeaf, yLeaf, e_union);
  }

  return xLeaf;
}

/*
 * This function is mainly dealing with "concat".
 */
Leaf* Stree::regItr1()
{
  Leaf *xLeaf, *yLeaf;

  xLeaf = regItr2();

  if (xLeaf == NULL)
	return NULL;

  if (current != '\0' && current != '|' && current != ')') {
	yLeaf = regItr1();
	xLeaf = growTree(xLeaf, yLeaf, e_concat);
  }

  return xLeaf;
}

Leaf* Stree::regItr2()
{
  Leaf *xLeaf;

  xLeaf = regItr3();

  if (xLeaf == NULL)
	return NULL;

  if (current == '*') {
	xLeaf = growTree(xLeaf, NULL, e_closure);
	setNextCurrent();
  } else if (current == '+') {
	xLeaf = growTree(xLeaf, NULL, e_pclosure);
	setNextCurrent();
  }

  return xLeaf;
}

Leaf* Stree::regItr3()
{
  Leaf *xLeaf = new Leaf();
  xLeaf->eleft = NULL;
  xLeaf->eright = NULL;

  if (current == '(') {
	setNextCurrent();
	xLeaf = regexp();
	if (current != ')')
	  perror("parse error");
	else
	  setNextCurrent();
  } else if (isalpha(current)) {
	xLeaf->op = e_char;
	xLeaf->ech = current;
	setNextCurrent();
  } else {
	if (current == '|') {
	  xLeaf->op = e_empty;
	  setNextCurrent();
	} else if (getBackCurrent() == '|'){
	  xLeaf->op = e_empty;
	  setNextCurrent();
	} else if (current == '\0') {
	  return NULL;
	} else {
	  perror("sentence error");
	}
  }

  return xLeaf;
}

Leaf* Stree::growTree(Leaf *left, Leaf *right, op_t op)
{
  Leaf *leaf = new Leaf();
  leaf->op = op;
  leaf->eleft = left;
  leaf->eright = right;

  return leaf;
}

void Stree::showAll()
{
  showItem(root, 0);
}

void Stree::showItem(Leaf *leaf, int count)
{
  if (leaf == NULL)
	return;

  if (leaf->op != e_char) {
	for (int i=0; i<=count; i++)
	  cout << " ";
	cout << "left" << endl;
	showItem(leaf->eleft, count+1);
  }

  for (int i=0; i<=count; i++)
	cout << " ";
  switch (leaf->op) {
  case e_union:
	cout << '|';
	break;
  case e_concat:
	break;
  case e_closure:
	cout << '*';
	break;
  case e_pclosure:
	cout << '+';
	break;
  case e_empty:
	cout << "empty" << endl;;
	return;
  case e_char:
	cout << leaf->ech << endl;;
	return;
  }
  cout << endl;

  for (int i=0; i<=count; i++)
	cout << " ";
  cout << "right" << endl;
  showItem(leaf->eright, count+1);
}

void Stree::perror(const char * ptr)
{
  cout << ptr << endl;
  exit(1);
}

nfa.h

#ifndef _NFA_INCLUDE_CHECK
#define _NFA_INCLUDE_CHECK

#include <iostream>
#include "regex.h"

const int NTRANS_MAX = 128;
const int NNODE_MAX = 128;
const char EMPTY = -1;
const int END = -1;

struct Ntrans {
  char ch;
  int to;
  int from;
};

struct Nnode {
  char ch;
  int to;
  Nnode* next;
};


class NfaClass {
  Ntrans* ntrans[NTRANS_MAX];
  int nodeNum;
  int ntransNum;
  int nnodeMaxNum;

  void nfaInitial();
  void nfaIter(Stree& st, Leaf* leaf, int start, int last);
  void createTrans(int from, int to, char ch);
  void setTransToNode();
  int getNewNode() { return ++nodeNum; }

public:
  Nnode* nnode[NNODE_MAX];

  NfaClass(Stree& st) {

	nfaInitial();

	Leaf* leaf = st.getRoot();
	nfaIter(st, leaf, 0, 1);
	setTransToNode();
  }
  ~NfaClass();
  Nnode* getRoot();
  void showNtransAll();
  void showNnodeAll();
};

#endif

nfa.cpp

#include "nfa.h"

void NfaClass::nfaIter(Stree& st, Leaf* leaf, int start, int last)
{
  switch (leaf->op) {
  case e_union:
	{
	  int newNodeA, newNodeB, newNodeC, newNodeD;
	  newNodeA = getNewNode();
	  newNodeB = getNewNode();
	  newNodeC = getNewNode();
	  newNodeD = getNewNode();
	  
	  createTrans(start, newNodeA, EMPTY);
	  createTrans(newNodeB, last, EMPTY);
	  createTrans(start, newNodeC, EMPTY);
	  createTrans(newNodeD, last, EMPTY);

	  nfaIter(st, leaf->eleft, newNodeA, newNodeB);
	  nfaIter(st, leaf->eright, newNodeC, newNodeD);
	}
	break;

  case e_concat:
	{
	  int newNode = getNewNode();
	  nfaIter(st, leaf->eleft, start, newNode);
	  nfaIter(st, leaf->eright, newNode, last);
	}
	break;

  case e_closure:
	{
	  int newNodeA, newNodeB;
	  newNodeA = getNewNode();
	  newNodeB = getNewNode();
	  
	  createTrans(start, newNodeA, EMPTY);
	  createTrans(newNodeB, newNodeA, EMPTY);
	  createTrans(newNodeA, last, EMPTY);

	  nfaIter(st, leaf->eleft, newNodeA, newNodeB);
	}
	break;

  case e_pclosure:
	{
	  int newNodeA, newNodeB;
	  newNodeA = getNewNode();
	  newNodeB = getNewNode();
	  
	  createTrans(start, newNodeA, EMPTY);
	  createTrans(newNodeB, newNodeA, EMPTY);
	  createTrans(newNodeB, last, EMPTY);

	  nfaIter(st, leaf->eleft, newNodeA, newNodeB);
	}
	break;

  case e_char:
	createTrans(start, last, leaf->ech);
	break;

  case e_empty:
	createTrans(start, last, EMPTY);
	break;
  }
}

void NfaClass::createTrans(int from, int to, char ch)
{
  Ntrans* nt = new Ntrans();
  nt->ch = ch;
  nt->to = to;
  nt->from = from;
  ntrans[ntransNum++] = nt;
}

void NfaClass::setTransToNode()
{
  for (int i=0; i<ntransNum; i++) {
	int from = ntrans[i]->from;
	if (nnode[from] != NULL) { // create new Nnode, and set to next link of nnode[i].

	  Nnode* newNode = new Nnode();
	  newNode->ch = ntrans[i]->ch;
	  newNode->to = ntrans[i]->to;
	  newNode->next = nnode[from];
	  nnode[from] = newNode;
	} else { // create new nnode[i]

	  nnode[from] = new Nnode();
	  nnode[from]->ch = ntrans[i]->ch;
	  nnode[from]->to = ntrans[i]->to;
	  nnode[from]->next = NULL;

	  if (from > nnodeMaxNum) nnodeMaxNum = from;
	}
  }
}

void NfaClass::nfaInitial()
{
  nodeNum = 1;
  ntransNum = 0;
  nnodeMaxNum = 0;

  for (int i=0; i<NTRANS_MAX; i++)
	ntrans[i] = NULL;

  for (int j=0; j<NNODE_MAX; j++)
	nnode[j] = NULL;
}

NfaClass::~NfaClass()
{
}

void NfaClass::showNtransAll() {
  for (int i=0; i<ntransNum; i++) {
	Ntrans* nt = ntrans[i];
	cout << "from:" << nt->from << " "
		 << "to:" << nt->to << " ";
	if (nt->ch == EMPTY)
	  cout << "char: EMPTY" << endl;
	else
	  cout << "char: " << nt->ch << endl;
  }
}

void NfaClass::showNnodeAll() {
  for (int i=0; i <= nnodeMaxNum; i++) {

	Nnode* nd = nnode[i];
	if (nd == NULL) continue;

	do {
	  cout << "from:" << i << " ";
	  cout << "to: " << nd->to << " ";
	  if (nd->ch == EMPTY)
		cout << "char: EMPTY" << endl;
	  else
		cout << "char: " << nd->ch << endl;

	  nd = nd->next;
	} while (nd != NULL);
  }
}

dfa.h

#ifndef _DFA_INCLUDE_CHECK
#define _DFA_INCLUDE_CHECK

#include <iostream>
#include "nfa.h"

#define nd_sib e.sibling
#define tr_ch e.transition.ch
#define tr_sib e.transition.sibling
#define tr_next e.transition.next

const int DFA_MAX = 128;

enum dfaOp_t {
  e_node,
  e_transition,
  e_final
};

struct Dtrans {
  int nnode_no;
  Dtrans* next;
};

struct Dnode {
  dfaOp_t op; // op_t represent the two statement as node or transition.
  union Entity {
	Dnode* sibling; // when op is "node"
	struct { // when op is "transition"
	  char ch;
	  Dnode* sibling; // If other charcter exist in this node, it is linked at sibling.
	  Dnode* next; // 
	} transition;
  } e;
};

class DfaClass {
  Dtrans* dtrans[DFA_MAX];
  Dnode* dnode[DFA_MAX];
  int numOfDnode;

  void dfaInitial(NfaClass& nfa);
  void nfaToDfa(NfaClass& nfa);
  void setEmptyFromNnode(NfaClass& nfa, Nnode* node, Dtrans** dtrans);
  int compareDtrans(Dtrans* dtrans_tmp, int checkNum);
  int getLinkSize(Dtrans* dtrans);
  bool isFinal(int dfastate);
  void showIter(Dnode* dn);

public:
  DfaClass(NfaClass& nfa) {
	dfaInitial(nfa);
	nfaToDfa(nfa);
  }
  
  ~DfaClass() {}
  void showDfaAll();
  bool regexp(char* search);
};

#endif

dfa.cpp

#include "dfa.h"

void DfaClass::dfaInitial(NfaClass& nfa)
{
  // make the DFA state of number 0
  dnode[0] = new Dnode();
  dtrans[0] = new Dtrans();

  dnode[0]->op = e_node;
  dnode[0]->nd_sib = NULL;

  dtrans[0]->nnode_no = 0;
  dtrans[0]->next = NULL;
  Nnode* node = nfa.nnode[0];
  setEmptyFromNnode(nfa, node, &dtrans[0]);

  if (isFinal(0))
	dnode[0]->op = e_final;

  numOfDnode = 0;
}

void DfaClass::setEmptyFromNnode(NfaClass& nfa, Nnode* node, Dtrans** dtrans)
{
  if (node == NULL) return;
  
  for (Nnode* p=node; p != NULL; p=p->next) {
	if (p->ch == EMPTY) {
	  Dtrans* newTrans = new Dtrans();
	  newTrans->nnode_no = p->to;
	  newTrans->next = *dtrans;
	  *dtrans = newTrans;

	  /** Check if the nnode next to this node is EMPTY */
	  Nnode* nextNode = nfa.nnode[p->to];
	  setEmptyFromNnode(nfa, nextNode, dtrans);
	}
  }
}

void DfaClass::nfaToDfa(NfaClass& nfa)
{
  int numCheckingDnode = 0;

  while (numCheckingDnode <= numOfDnode) {


	for (Dtrans* dtOuter = dtrans[numCheckingDnode];
		 dtOuter != NULL;
		 dtOuter = dtOuter->next) {

	  int nnode_no = dtOuter->nnode_no;

	  // final point of nnode
	  if (nfa.nnode[nnode_no] == NULL) continue;

	  char ch = nfa.nnode[nnode_no]->ch;
	  if (ch == EMPTY) continue;

	  int next_nnode_no = nfa.nnode[nnode_no]->to;
	  Dtrans* dtrans_tmp = new Dtrans();
	  dtrans_tmp->nnode_no = next_nnode_no;
	  dtrans_tmp->next = NULL;

	  // If the next of this nnode[] is EMPTY,
	  // add the EMPTY state to dtrans_tmp.
	  setEmptyFromNnode(nfa, nfa.nnode[next_nnode_no], &dtrans_tmp);

	  // ch != EMPTY. Search same ch at next dtranses.
	  for (Dtrans* dtInner = dtOuter->next;
		   dtInner != NULL;
		   dtInner = dtInner->next) {

		int noNext = dtInner->nnode_no;
		char chNext = nfa.nnode[noNext]->ch;
		if (chNext != ch) continue;

		int next_noNext = nfa.nnode[noNext]->to;
		Dtrans* newTransNext = new Dtrans();
		newTransNext->nnode_no = next_noNext;
		newTransNext->next = dtrans_tmp;
		dtrans_tmp = newTransNext;

		// If the next of this nnode[] is EMPTY,
		// add the EMPTY state to dtrans_tmp.
		setEmptyFromNnode(nfa, nfa.nnode[next_noNext], &dtrans_tmp);
	  }

	  /** Now, not empty dtrans_tmp exist.
	   *  We compare this dtrans_tmp to dtrans[x] as x <= numOfDnode
	   */
	  int needMakeNew = compareDtrans(dtrans_tmp, numOfDnode);
	  if (needMakeNew != -1) { // The same state already exist as dtrans_tmp.

		Dnode* newDnode = new Dnode();
		newDnode->op = e_transition;
		newDnode->tr_ch = ch;
		newDnode->tr_next = dnode[needMakeNew];
		newDnode->tr_sib = dnode[numCheckingDnode]->nd_sib;
		dnode[numCheckingDnode]->nd_sib = newDnode;
	  } else { // not exist. and make new state

		numOfDnode++;
		dtrans[numOfDnode] = dtrans_tmp;
		dnode[numOfDnode] = new Dnode();
		dnode[numOfDnode]->op = e_node;
		dnode[numOfDnode]->nd_sib = NULL;

		Dnode* newDnode = new Dnode();
		newDnode->op = e_transition;
		newDnode->tr_ch = ch;
		newDnode->tr_next = dnode[numOfDnode];
		newDnode->tr_sib = dnode[numCheckingDnode]->nd_sib;
		dnode[numCheckingDnode]->nd_sib = newDnode;

		if (isFinal(numOfDnode))
		  dnode[numOfDnode]->op = e_final;
	  }
	}
	
	numCheckingDnode++;
  }
}

/**
 * If a dtrans exist the same dtrans_tmp, return the state number
 * else return -1
 */
int DfaClass::compareDtrans(Dtrans* dtrans_tmp, int checkNum)
{
  const int NOT_EQUAL = 0;
  const int EQUAL = 1;
  int compFlag = EQUAL;

  int count;
  for (count = 0; count <= checkNum; count++) {
	Dtrans* dtp = dtrans_tmp;
	Dtrans* dti  = dtrans[count];
	
	if (getLinkSize(dtp) != getLinkSize(dti))
	  continue;

	for (; dtp != NULL && dti != NULL ; dtp = dtp->next, dti = dti->next) {
	  if ( dtp->nnode_no == dti->nnode_no ) continue;
	  else {
		compFlag = NOT_EQUAL;
		break;
	  }
	}

	if (compFlag == NOT_EQUAL) {
	  compFlag = EQUAL;
	  continue;
	}

	return count;
  }

  return -1;
}

int DfaClass::getLinkSize(Dtrans* dtrans)
{
  int size = 0;
  for (Dtrans* p = dtrans; p != NULL; p = p->next)
	size++;

  return size;
}

bool DfaClass::isFinal(int dfastate)
{
  Dtrans* p = dtrans[dfastate];
  for (; p != NULL; p = p->next)
	if (p->nnode_no == 1)  return true;

  return false;
}

bool DfaClass::regexp(char* search)
{
  char* p = search;
  Dnode* node = dnode[0];
  Dnode* prev = node;
  bool checkFlag = false;

  while (*p != '\0') {

	if (node == NULL) return false;

	Dnode* sib = node->e.sibling;
	for (; sib != NULL; sib = sib->tr_sib) {
	  
	  if (sib->tr_ch != *p) continue;

	  checkFlag = true;
	  break;
	}
	if (checkFlag == false) return false;
	
	checkFlag = false;
	
	node = sib->tr_next;
	prev = node;
	p++;
  }

  if (prev->op != e_final) return false;

  return true;
}

void DfaClass::showDfaAll()
{
  for (int i=0; i<=numOfDnode; i++) {
	Dnode* dn = dnode[i];
	cout << "node:" << i << " ";
	showIter(dn->nd_sib);
	cout << endl;
  }
}

void DfaClass::showIter(Dnode* dn)
{
  Dnode* p=dn;
  for (; p != NULL; p = p->tr_sib)
	cout << "char:" << p->tr_ch << " ";
}

main.cpp

#include "regex.h"
#include "nfa.h"
#include "dfa.h"

int main(int argc, char **argv)
{
  if (argc != 3) {
	cout << "Usage: Stree (regex string) (checking string)" << endl;
	exit(0);
  }
  char *regex = new char[strlen(argv[1])];
  char *check = new char[strlen(argv[2])];
  strncpy(regex, argv[1], strlen(argv[1]));
  strncpy(check, argv[2], strlen(argv[2]));

  Stree st(regex);
  NfaClass nfa(st);
  DfaClass dfa(nfa);

  nfa.showNtransAll();
  nfa.showNnodeAll();
  dfa.showDfaAll();

  cout << boolalpha;
  cout << "The result of checking this regular expression is '"
	   << dfa.regexp(check) << "'" << endl;

  delete regex;
  delete check;

  return 0;
}
最終更新:2007年04月16日 01:54