// parser.cc // Note: this code is butt-ugly, and should be rewritten from scratch. // nevertheless, it appears to work #include #include #include #include #include #include typedef string Set; typedef pair Occurance; template< class It > string columns( It a, It b, int n ) { string s = ""; for( It i=a; i!=b; i++ ) s += rpad( *i, n ); return s; } template vector::reference at( vector& v, vector::size_type i ) { assert( i>=0 && i='a') || c=='$'; } bool is_non_terminal( char c ) { return (c<='Z' && c>='A'); } bool contains( string s, char c ) { return find( s.begin(), s.end(), c )!=s.end(); } Set set_intersection( Set s, Set t ) { Set r = ""; for( string::size_type i=0; i" << rpad(p.rhs(),7); return o; } class Grammar { // basic state char _start_symbol; vector _rules; // these are fixed, we use their string _non_terminals; // indicies for other things typedef vector::iterator ProdIt; // state for the derives-lambda calculations vector symbol_derives_empty; // one for each non-terminal vector rule_derives_empty; // one for each production string worklist; vector count; // one for each production // Computation of lambda derivations, from Figure 4.7 of the textbook void derives_empty_string() { for( string::size_type i=0; i<_non_terminals.length(); i++ ) at(symbol_derives_empty,i) = false; int n=0; for( ProdIt i=_rules.begin(); i!=_rules.end(); i++, n++ ) { at(rule_derives_empty,n) = false; count_symbols(n); check_for_empty(n); } while( worklist.length() != 0 ) { char X = worklist[0]; worklist.erase( worklist.begin() ); vector o = occurances( X ); for( vector::iterator p=o.begin(); p!=o.end(); p++ ) { at(count,*p)--; check_for_empty( *p ); } } } void count_symbols( int p ) { at(count,p) = at(_rules,p).rhs().length(); } void check_for_empty( int p ) { if( at(count,p) == 0 ) { at(rule_derives_empty,p) = true; char A = at(_rules,p).lhs(); string::size_type ap = _non_terminals.find(A); assert( ap != string::npos ); if( !at(symbol_derives_empty,ap) ) { at(symbol_derives_empty,ap) = true; if( !contains( worklist, A ) ) worklist += A; } } } vector occurances( char c ) { // returns a list of all production numbers that have c on the // right hand side of the rule; includes duplicates vector result; int n=0; for( ProdIt i=_rules.begin(); i!=_rules.end(); i++, n++ ) { string s = i->rhs(); string::iterator j = find( s.begin(), s.end(), c ); while( j != s.end() ) { result.push_back(n); j = find( j+1, s.end(), c ); } } return result; } vector< Occurance > f_occurances( char c ) { // returns a list of all production numbers that have c on the // right hand side of the rule; includes duplicates vector< Occurance > result; int n=0; for( ProdIt i=_rules.begin(); i!=_rules.end(); i++, n++ ) { string s = i->rhs(); string::iterator j = find( s.begin(), s.end(), c ); while( j != s.end() ) { result.push_back( make_pair( n, string(j+1,s.end()) ) ); j = find( j+1, s.end(), c ); } } return result; } // for "first" computations vector visited_first; Set first( string s ) { for( string::size_type i=0; i<_non_terminals.length(); i++ ) visited_first.push_back( false ); return internal_first( s ); } Set internal_first( string s ) { //cout << "int_first a" << endl; if( s == "" ) return string(""); char X = s[0]; if( is_terminal(X) ) return string("")+X; string::size_type p = _non_terminals.find(X); //cout << "int_first b: p=" << p << " x=" << X << endl; assert( p != string::npos ); Set ans = ""; if( !at(visited_first,p) ) { at(visited_first,p) = true; vector v = productions(X); for( ProdIt i=v.begin(); i!=v.end(); i++ ) { ans = set_union( ans, internal_first( i->rhs() ) ); } } s.erase(s.begin()); // "beta" if( at(symbol_derives_empty,p) ) ans = set_union( ans, internal_first(s) ); //cout << "int_first h" << endl; return ans; } // for follow set computation vector visited_follow; Set internal_follow( char A ) { Set ans = ""; string::size_type n = _non_terminals.find(A); assert( n != string::npos ); if( !at(visited_follow,n) ) { at(visited_follow,n) = true; vector o = f_occurances(A); for( vector::iterator i=o.begin(); i!=o.end(); i++ ) { ans = set_union( ans, first( i->second ) ); if( all_derive_empty( i->second ) ) ans = set_union( ans, internal_follow(at(_rules,i->first).lhs()) ); } } return ans; } bool all_derive_empty( string s ) { for( string::size_type i=0; i > _predict; void compute_predict() { int n=0; for( ProdIt i=_rules.begin(); i!=_rules.end(); i++, n++ ) { Set ans = first( *i ); //cout << "first: " << rpad(ans,7); if( rule_derives_empty[n] ) { ans = set_union( ans, follow( i->lhs() ) ); //cout << " follow: " << rpad(ans,7); } _predict.push_back( make_pair( *i, ans ) ); //cout << " predict: " << rpad(ans,7) << endl; } } int prod_no( Production p ) { // production number int n=0; for( ProdIt i=_rules.begin(); i!=_rules.end(); i++, n++ ) if( p == *i ) return n; assert(0); return -1; // sate compiler } int nt_no( char A ) { // non-terminal number for( string::size_type i=0; i<_non_terminals.length(); i++ ) if( _non_terminals[i] == A ) return i; assert(0); return -1; // sate compiler } int t_no( char c ) { // terminal number string term = terminals(); for( string::size_type i=0; i >& v, char A, Set pred, int p ) { for( Set::size_type i=0; i rules ) : _start_symbol(ss) { for( ProdIt i=rules.begin(); i!=rules.end(); i++ ) { char c = i->lhs(); // DEBUG ATTEMPT // if( c == _start_symbol ) // i->_rhs += '$'; if( !contains( _non_terminals, c ) ) { _non_terminals += c; symbol_derives_empty.push_back(false); } count.push_back(0); rule_derives_empty.push_back(false); } _rules = rules; derives_empty_string(); compute_predict(); } char start_symbol() { return _start_symbol; } string non_terminals() { return _non_terminals; } vector productions( char c ) { assert( is_non_terminal(c) ); vector v; for( ProdIt i=_rules.begin(); i!=_rules.end(); i++ ) { if( i->lhs() == c ) v.push_back( *i ); } return v; } string first( Production p ) { visited_first.clear(); return first( p.rhs() ); } string follow( char A ) { visited_follow.clear(); for( string::size_type i=0; i<_non_terminals.length(); i++ ) visited_follow.push_back( false ); return internal_follow( A ); } vector< pair > predict() { return _predict; } vector< pair > predict( char A ) { vector< pair > v; vector< pair >::iterator i; for( i=_predict.begin(); i!=_predict.end(); i++ ) { if( i->first.lhs() == A ) v.push_back( *i ); } return v; } bool derives_empty( int n ) { return at(rule_derives_empty,n); } bool is_ll1() { string N = non_terminals(); for( string::size_type i=0; i > v = predict(A); vector< pair >::iterator j; for( j=v.begin(); j!=v.end(); j++ ) { if( set_intersection( j->second, pred_set ) != "" ) return false; pred_set = set_union( pred_set, j->second ); } } return true; } vector< vector > table() { // returns table so that table[nonterminal][terminal] -> rule number assert( is_ll1() ); // create the vector "structure" and fill with 0s int N = non_terminals().length(); int T = terminals().length(); vector< vector > v; vector temp; for( int i=0; i > pred = predict(A); vector< pair >::iterator j; for( j=pred.begin(); j!=pred.end(); j++ ) { int p = prod_no( j->first ); fill( v, j->first.lhs(), j->second, p ); } } return v; } Set terminals() { // compute list of terminals Set result = ""; for( ProdIt i=_rules.begin(); i!=_rules.end(); i++ ) { string rhs = i->rhs(); for( string::size_type j=0; j v; /* Production p( 'E', "PoEc" ); v.push_back( p ); v.push_back( Production( 'E', "vT" ) ); v.push_back( Production( 'P', "f" ) ); v.push_back( Production( 'P', "" ) ); v.push_back( Production( 'T', "pE" ) ); v.push_back( Production( 'T', "" ) ); Grammar g( 'E', v ); cout << "first of E->PoEc is " << g.first(p) << endl; cout << "follow of T is " << g.follow('E') << endl; */ /* v.push_back( Production( 'S', "AC" ) ); v.push_back( Production( 'C', "c" ) ); v.push_back( Production( 'C', "" ) ); v.push_back( Production( 'A', "aBCd" ) ); v.push_back( Production( 'A', "BQ" ) ); v.push_back( Production( 'B', "bB" ) ); v.push_back( Production( 'B', "" ) ); v.push_back( Production( 'Q', "q" ) ); v.push_back( Production( 'Q', "" ) ); Grammar g( 'S', v ); */ /* ch5, p2 */ /* v.push_back( Production( 'S', "Ab" ) ); v.push_back( Production( 'A', "a" ) ); v.push_back( Production( 'A', "B" ) ); v.push_back( Production( 'A', "" ) ); v.push_back( Production( 'B', "b" ) ); v.push_back( Production( 'B', "" ) ); Grammar g( 'S', v ); */ cout << "Pick a problem, 3-5: " << flush; int zzz; cin >> zzz; cout << endl; if( zzz==3 ) { /* ch5, p3 */ v.push_back( Production( 'S', "ABBA" ) ); v.push_back( Production( 'A', "a" ) ); v.push_back( Production( 'A', "" ) ); v.push_back( Production( 'B', "b" ) ); v.push_back( Production( 'B', "" ) ); v.push_back( Production( 'Z', "S$" ) ); } else if( zzz==4 ) { /* ch5, p4 */ v.push_back( Production( 'S', "aSe" ) ); v.push_back( Production( 'S', "B" ) ); v.push_back( Production( 'B', "bBe" ) ); v.push_back( Production( 'B', "C" ) ); v.push_back( Production( 'C', "cCe" ) ); v.push_back( Production( 'C', "d" ) ); v.push_back( Production( 'Z', "S$" ) ); } else { /* ch5, p5 */ v.push_back( Production( 'E', "mE" ) ); v.push_back( Production( 'E', "oEc" ) ); v.push_back( Production( 'E', "VF" ) ); v.push_back( Production( 'F', "mE" ) ); v.push_back( Production( 'F', "" ) ); v.push_back( Production( 'V', "iW" ) ); v.push_back( Production( 'W', "oEc" ) ); v.push_back( Production( 'W', "" ) ); v.push_back( Production( 'Z', "E$" ) ); } Grammar g( 'Z', v ); vector< pair > pr = g.predict(); int n=0; for( vector< pair >::iterator i=pr.begin(); i!=pr.end(); i++, n++ ) { cout << rpad(n+1,3) << i->first << " first: " << rpad(g.first(i->first),7) << " der em: " << g.derives_empty(n) << " follow: " << rpad(g.follow(i->first.lhs()),7) << " predict: " << i->second << endl; } cout << "The grammar " << (g.is_ll1()?"is":"is not") << " LL(1)." << endl; if( g.is_ll1() ) { vector< vector > table = g.table(); string term = g.terminals(); string non = g.non_terminals(); cout << endl << rpad("",3) << columns( term.begin(), term.end(), 3 ) << endl; for( string::size_type i=0; i