正規表現 R|S RS R* R+ のパーサ
#ifndef _REGEX_INCLUDE_CHECK #define _REGEX_INCLUDE_CHECK #include <iostream> using namespace std; #define eleft e.child.left #define eright e.child.right #define ech e.ch enum op_t { e_union, e_concat, e_closure, e_pclosure, e_empty, e_char }; struct Leaf { op_t op; /* * if op == "char", then the param of this union is character * else , then the param of this union is the struct that represents * a left tree and a right tree. */ union Entity { char ch; struct { Leaf *left; Leaf *right; } child; } e; }; class Stree { char current; char *inspectedStr, *baseStr; Leaf *root; void perror(const char * ptr); char getNextCurrent(); char getBackCurrent(); void setNextCurrent(); Leaf* growTree(Leaf* left, Leaf* right, op_t op); Leaf* regexp(); Leaf* regItr1(); Leaf* regItr2(); Leaf* regItr3(); void showItem(Leaf *leaf, int count); public: Stree(char *setch) { inspectedStr = setch; baseStr = setch; current = *(inspectedStr); root = regexp(); } Leaf* getRoot() { return root; } void showAll(); }; #endif
#include "regex.h" inline char Stree::getNextCurrent() { return *(inspectedStr+1); } inline char Stree::getBackCurrent() { if (baseStr < (inspectedStr-1)) return '\0'; return *(inspectedStr-1); } inline void Stree::setNextCurrent() { current = *(++inspectedStr); } /* * This function is mainly dealing with "union". */ Leaf* Stree::regexp() { Leaf *xLeaf, *yLeaf; xLeaf = regItr1(); if (current == '|') { setNextCurrent(); yLeaf = regexp(); xLeaf = growTree(xLeaf, yLeaf, e_union); } return xLeaf; } /* * This function is mainly dealing with "concat". */ Leaf* Stree::regItr1() { Leaf *xLeaf, *yLeaf; xLeaf = regItr2(); if (xLeaf == NULL) return NULL; if (current != '\0' && current != '|' && current != ')') { yLeaf = regItr1(); xLeaf = growTree(xLeaf, yLeaf, e_concat); } return xLeaf; } Leaf* Stree::regItr2() { Leaf *xLeaf; xLeaf = regItr3(); if (xLeaf == NULL) return NULL; if (current == '*') { xLeaf = growTree(xLeaf, NULL, e_closure); setNextCurrent(); } else if (current == '+') { xLeaf = growTree(xLeaf, NULL, e_pclosure); setNextCurrent(); } return xLeaf; } Leaf* Stree::regItr3() { Leaf *xLeaf = new Leaf(); xLeaf->eleft = NULL; xLeaf->eright = NULL; if (current == '(') { setNextCurrent(); xLeaf = regexp(); if (current != ')') perror("parse error"); else setNextCurrent(); } else if (isalpha(current)) { xLeaf->op = e_char; xLeaf->ech = current; setNextCurrent(); } else { if (current == '|') { xLeaf->op = e_empty; setNextCurrent(); } else if (getBackCurrent() == '|'){ xLeaf->op = e_empty; setNextCurrent(); } else if (current == '\0') { return NULL; } else { perror("sentence error"); } } return xLeaf; } Leaf* Stree::growTree(Leaf *left, Leaf *right, op_t op) { Leaf *leaf = new Leaf(); leaf->op = op; leaf->eleft = left; leaf->eright = right; return leaf; } void Stree::showAll() { showItem(root, 0); } void Stree::showItem(Leaf *leaf, int count) { if (leaf == NULL) return; if (leaf->op != e_char) { for (int i=0; i<=count; i++) cout << " "; cout << "left" << endl; showItem(leaf->eleft, count+1); } for (int i=0; i<=count; i++) cout << " "; switch (leaf->op) { case e_union: cout << '|'; break; case e_concat: break; case e_closure: cout << '*'; break; case e_pclosure: cout << '+'; break; case e_empty: cout << "empty" << endl;; return; case e_char: cout << leaf->ech << endl;; return; } cout << endl; for (int i=0; i<=count; i++) cout << " "; cout << "right" << endl; showItem(leaf->eright, count+1); } void Stree::perror(const char * ptr) { cout << ptr << endl; exit(1); }
#ifndef _NFA_INCLUDE_CHECK #define _NFA_INCLUDE_CHECK #include <iostream> #include "regex.h" const int NTRANS_MAX = 128; const int NNODE_MAX = 128; const char EMPTY = -1; const int END = -1; struct Ntrans { char ch; int to; int from; }; struct Nnode { char ch; int to; Nnode* next; }; class NfaClass { Ntrans* ntrans[NTRANS_MAX]; int nodeNum; int ntransNum; int nnodeMaxNum; void nfaInitial(); void nfaIter(Stree& st, Leaf* leaf, int start, int last); void createTrans(int from, int to, char ch); void setTransToNode(); int getNewNode() { return ++nodeNum; } public: Nnode* nnode[NNODE_MAX]; NfaClass(Stree& st) { nfaInitial(); Leaf* leaf = st.getRoot(); nfaIter(st, leaf, 0, 1); setTransToNode(); } ~NfaClass(); Nnode* getRoot(); void showNtransAll(); void showNnodeAll(); }; #endif
#include "nfa.h" void NfaClass::nfaIter(Stree& st, Leaf* leaf, int start, int last) { switch (leaf->op) { case e_union: { int newNodeA, newNodeB, newNodeC, newNodeD; newNodeA = getNewNode(); newNodeB = getNewNode(); newNodeC = getNewNode(); newNodeD = getNewNode(); createTrans(start, newNodeA, EMPTY); createTrans(newNodeB, last, EMPTY); createTrans(start, newNodeC, EMPTY); createTrans(newNodeD, last, EMPTY); nfaIter(st, leaf->eleft, newNodeA, newNodeB); nfaIter(st, leaf->eright, newNodeC, newNodeD); } break; case e_concat: { int newNode = getNewNode(); nfaIter(st, leaf->eleft, start, newNode); nfaIter(st, leaf->eright, newNode, last); } break; case e_closure: { int newNodeA, newNodeB; newNodeA = getNewNode(); newNodeB = getNewNode(); createTrans(start, newNodeA, EMPTY); createTrans(newNodeB, newNodeA, EMPTY); createTrans(newNodeA, last, EMPTY); nfaIter(st, leaf->eleft, newNodeA, newNodeB); } break; case e_pclosure: { int newNodeA, newNodeB; newNodeA = getNewNode(); newNodeB = getNewNode(); createTrans(start, newNodeA, EMPTY); createTrans(newNodeB, newNodeA, EMPTY); createTrans(newNodeB, last, EMPTY); nfaIter(st, leaf->eleft, newNodeA, newNodeB); } break; case e_char: createTrans(start, last, leaf->ech); break; case e_empty: createTrans(start, last, EMPTY); break; } } void NfaClass::createTrans(int from, int to, char ch) { Ntrans* nt = new Ntrans(); nt->ch = ch; nt->to = to; nt->from = from; ntrans[ntransNum++] = nt; } void NfaClass::setTransToNode() { for (int i=0; i<ntransNum; i++) { int from = ntrans[i]->from; if (nnode[from] != NULL) { // create new Nnode, and set to next link of nnode[i]. Nnode* newNode = new Nnode(); newNode->ch = ntrans[i]->ch; newNode->to = ntrans[i]->to; newNode->next = nnode[from]; nnode[from] = newNode; } else { // create new nnode[i] nnode[from] = new Nnode(); nnode[from]->ch = ntrans[i]->ch; nnode[from]->to = ntrans[i]->to; nnode[from]->next = NULL; if (from > nnodeMaxNum) nnodeMaxNum = from; } } } void NfaClass::nfaInitial() { nodeNum = 1; ntransNum = 0; nnodeMaxNum = 0; for (int i=0; i<NTRANS_MAX; i++) ntrans[i] = NULL; for (int j=0; j<NNODE_MAX; j++) nnode[j] = NULL; } NfaClass::~NfaClass() { } void NfaClass::showNtransAll() { for (int i=0; i<ntransNum; i++) { Ntrans* nt = ntrans[i]; cout << "from:" << nt->from << " " << "to:" << nt->to << " "; if (nt->ch == EMPTY) cout << "char: EMPTY" << endl; else cout << "char: " << nt->ch << endl; } } void NfaClass::showNnodeAll() { for (int i=0; i <= nnodeMaxNum; i++) { Nnode* nd = nnode[i]; if (nd == NULL) continue; do { cout << "from:" << i << " "; cout << "to: " << nd->to << " "; if (nd->ch == EMPTY) cout << "char: EMPTY" << endl; else cout << "char: " << nd->ch << endl; nd = nd->next; } while (nd != NULL); } }
#ifndef _DFA_INCLUDE_CHECK #define _DFA_INCLUDE_CHECK #include <iostream> #include "nfa.h" #define nd_sib e.sibling #define tr_ch e.transition.ch #define tr_sib e.transition.sibling #define tr_next e.transition.next const int DFA_MAX = 128; enum dfaOp_t { e_node, e_transition, e_final }; struct Dtrans { int nnode_no; Dtrans* next; }; struct Dnode { dfaOp_t op; // op_t represent the two statement as node or transition. union Entity { Dnode* sibling; // when op is "node" struct { // when op is "transition" char ch; Dnode* sibling; // If other charcter exist in this node, it is linked at sibling. Dnode* next; // } transition; } e; }; class DfaClass { Dtrans* dtrans[DFA_MAX]; Dnode* dnode[DFA_MAX]; int numOfDnode; void dfaInitial(NfaClass& nfa); void nfaToDfa(NfaClass& nfa); void setEmptyFromNnode(NfaClass& nfa, Nnode* node, Dtrans** dtrans); int compareDtrans(Dtrans* dtrans_tmp, int checkNum); int getLinkSize(Dtrans* dtrans); bool isFinal(int dfastate); void showIter(Dnode* dn); public: DfaClass(NfaClass& nfa) { dfaInitial(nfa); nfaToDfa(nfa); } ~DfaClass() {} void showDfaAll(); bool regexp(char* search); }; #endif
#include "dfa.h" void DfaClass::dfaInitial(NfaClass& nfa) { // make the DFA state of number 0 dnode[0] = new Dnode(); dtrans[0] = new Dtrans(); dnode[0]->op = e_node; dnode[0]->nd_sib = NULL; dtrans[0]->nnode_no = 0; dtrans[0]->next = NULL; Nnode* node = nfa.nnode[0]; setEmptyFromNnode(nfa, node, &dtrans[0]); if (isFinal(0)) dnode[0]->op = e_final; numOfDnode = 0; } void DfaClass::setEmptyFromNnode(NfaClass& nfa, Nnode* node, Dtrans** dtrans) { if (node == NULL) return; for (Nnode* p=node; p != NULL; p=p->next) { if (p->ch == EMPTY) { Dtrans* newTrans = new Dtrans(); newTrans->nnode_no = p->to; newTrans->next = *dtrans; *dtrans = newTrans; /** Check if the nnode next to this node is EMPTY */ Nnode* nextNode = nfa.nnode[p->to]; setEmptyFromNnode(nfa, nextNode, dtrans); } } } void DfaClass::nfaToDfa(NfaClass& nfa) { int numCheckingDnode = 0; while (numCheckingDnode <= numOfDnode) { for (Dtrans* dtOuter = dtrans[numCheckingDnode]; dtOuter != NULL; dtOuter = dtOuter->next) { int nnode_no = dtOuter->nnode_no; // final point of nnode if (nfa.nnode[nnode_no] == NULL) continue; char ch = nfa.nnode[nnode_no]->ch; if (ch == EMPTY) continue; int next_nnode_no = nfa.nnode[nnode_no]->to; Dtrans* dtrans_tmp = new Dtrans(); dtrans_tmp->nnode_no = next_nnode_no; dtrans_tmp->next = NULL; // If the next of this nnode[] is EMPTY, // add the EMPTY state to dtrans_tmp. setEmptyFromNnode(nfa, nfa.nnode[next_nnode_no], &dtrans_tmp); // ch != EMPTY. Search same ch at next dtranses. for (Dtrans* dtInner = dtOuter->next; dtInner != NULL; dtInner = dtInner->next) { int noNext = dtInner->nnode_no; char chNext = nfa.nnode[noNext]->ch; if (chNext != ch) continue; int next_noNext = nfa.nnode[noNext]->to; Dtrans* newTransNext = new Dtrans(); newTransNext->nnode_no = next_noNext; newTransNext->next = dtrans_tmp; dtrans_tmp = newTransNext; // If the next of this nnode[] is EMPTY, // add the EMPTY state to dtrans_tmp. setEmptyFromNnode(nfa, nfa.nnode[next_noNext], &dtrans_tmp); } /** Now, not empty dtrans_tmp exist. * We compare this dtrans_tmp to dtrans[x] as x <= numOfDnode */ int needMakeNew = compareDtrans(dtrans_tmp, numOfDnode); if (needMakeNew != -1) { // The same state already exist as dtrans_tmp. Dnode* newDnode = new Dnode(); newDnode->op = e_transition; newDnode->tr_ch = ch; newDnode->tr_next = dnode[needMakeNew]; newDnode->tr_sib = dnode[numCheckingDnode]->nd_sib; dnode[numCheckingDnode]->nd_sib = newDnode; } else { // not exist. and make new state numOfDnode++; dtrans[numOfDnode] = dtrans_tmp; dnode[numOfDnode] = new Dnode(); dnode[numOfDnode]->op = e_node; dnode[numOfDnode]->nd_sib = NULL; Dnode* newDnode = new Dnode(); newDnode->op = e_transition; newDnode->tr_ch = ch; newDnode->tr_next = dnode[numOfDnode]; newDnode->tr_sib = dnode[numCheckingDnode]->nd_sib; dnode[numCheckingDnode]->nd_sib = newDnode; if (isFinal(numOfDnode)) dnode[numOfDnode]->op = e_final; } } numCheckingDnode++; } } /** * If a dtrans exist the same dtrans_tmp, return the state number * else return -1 */ int DfaClass::compareDtrans(Dtrans* dtrans_tmp, int checkNum) { const int NOT_EQUAL = 0; const int EQUAL = 1; int compFlag = EQUAL; int count; for (count = 0; count <= checkNum; count++) { Dtrans* dtp = dtrans_tmp; Dtrans* dti = dtrans[count]; if (getLinkSize(dtp) != getLinkSize(dti)) continue; for (; dtp != NULL && dti != NULL ; dtp = dtp->next, dti = dti->next) { if ( dtp->nnode_no == dti->nnode_no ) continue; else { compFlag = NOT_EQUAL; break; } } if (compFlag == NOT_EQUAL) { compFlag = EQUAL; continue; } return count; } return -1; } int DfaClass::getLinkSize(Dtrans* dtrans) { int size = 0; for (Dtrans* p = dtrans; p != NULL; p = p->next) size++; return size; } bool DfaClass::isFinal(int dfastate) { Dtrans* p = dtrans[dfastate]; for (; p != NULL; p = p->next) if (p->nnode_no == 1) return true; return false; } bool DfaClass::regexp(char* search) { char* p = search; Dnode* node = dnode[0]; Dnode* prev = node; bool checkFlag = false; while (*p != '\0') { if (node == NULL) return false; Dnode* sib = node->e.sibling; for (; sib != NULL; sib = sib->tr_sib) { if (sib->tr_ch != *p) continue; checkFlag = true; break; } if (checkFlag == false) return false; checkFlag = false; node = sib->tr_next; prev = node; p++; } if (prev->op != e_final) return false; return true; } void DfaClass::showDfaAll() { for (int i=0; i<=numOfDnode; i++) { Dnode* dn = dnode[i]; cout << "node:" << i << " "; showIter(dn->nd_sib); cout << endl; } } void DfaClass::showIter(Dnode* dn) { Dnode* p=dn; for (; p != NULL; p = p->tr_sib) cout << "char:" << p->tr_ch << " "; }
#include "regex.h" #include "nfa.h" #include "dfa.h" int main(int argc, char **argv) { if (argc != 3) { cout << "Usage: Stree (regex string) (checking string)" << endl; exit(0); } char *regex = new char[strlen(argv[1])]; char *check = new char[strlen(argv[2])]; strncpy(regex, argv[1], strlen(argv[1])); strncpy(check, argv[2], strlen(argv[2])); Stree st(regex); NfaClass nfa(st); DfaClass dfa(nfa); nfa.showNtransAll(); nfa.showNnodeAll(); dfa.showDfaAll(); cout << boolalpha; cout << "The result of checking this regular expression is '" << dfa.regexp(check) << "'" << endl; delete regex; delete check; return 0; }