#ifndef __YOOLEX_HH__ #define __YOOLEX_HH__ /////////////////////////////////////////////////////////////////////////// // // $Id: yoolex.hh,v 1.8 2003/01/19 21:33:57 coconut Exp $ // // YooLex is the base class of the generated lexical scanner classes. // This file also contain another container class yoolex::PseudoVector // that doesn't have actual storage allocation. It is used to speed // things up when necessary. // // Copyright (c) 2001-2002, Heng Yuan // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), // to deal in the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: // // The above copyright notice and this permission notice shall be included // in all copies or substantial portions of the Software. // // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER // DEALINGS IN THE SOFTWARE. // /////////////////////////////////////////////////////////////////////////// #include #include #include #include #include #if defined(__GNUC__) && (__GNUC__ > 2) #include namespace std { using __gnu_cxx::copy_n; } #endif #ifdef YYDEBUG #ifndef YYLEXDEBUG #define YYLEXDEBUG #endif // YYLEXDEBUG #endif // YYDEBUG #ifdef YYLEXDEBUG #define YYLEXMSG(s) std::cerr << __FUNCTION__ << ": " << s << std::endl #else // YYLEXDEBUG #define YYLEXMSG(s) #endif // YYLEXDEBUG namespace yoogroup { // // This class is a pseudo container that it does not have actual // allocated space for it. Thus, one should be very cautious // not to go over the boundary. // // not all container functions are provided since this one cannot // be expanded. // class PseudoVector { private: ///////////////////////////////////////////////////////// // // Private Variables // ///////////////////////////////////////////////////////// std::vector::iterator _start; // start point of the array std::vector::iterator _end; // end point of the array public: ///////////////////////////////////////////////////////// // // Container Type Definitions // ///////////////////////////////////////////////////////// typedef std::vector::value_type value_type; typedef std::vector::size_type size_type; typedef std::vector::reference reference; typedef std::vector::const_reference const_reference; typedef std::vector::difference_type difference_type; typedef std::vector::pointer pointer; typedef std::vector::const_pointer const_pointer; typedef std::vector::iterator iterator; typedef std::vector::const_iterator const_iterator; #ifndef _RWSTD_NO_CLASS_PARTIAL_SPEC typedef std::reverse_iterator reverse_iterator; typedef std::reverse_iterator const_reverse_iterator; #else typedef std::reverse_iterator const_reverse_iterator; typedef std::reverse_iterator reverse_iterator; #endif ///////////////////////////////////////////////////////// // // Public Functions // ///////////////////////////////////////////////////////// PseudoVector () : _start (0), _end (0) { } PseudoVector (iterator start, iterator end) : _start (start), _end (end) { } void assign (iterator start, iterator end) { _start = start; _end = end; } // // The entire purpose of this container is to provide the following // inline functions that don't do much at all :) // iterator begin () { return _start; } const_iterator begin () const { return _start; } iterator end () { return _end; } const_iterator end () const { return _end; } reverse_iterator rbegin () { return reverse_iterator (end ()); } const_reverse_iterator rbegin () const { return const_reverse_iterator (end ()); } reverse_iterator rend () { return reverse_iterator (begin ()); } const_reverse_iterator rend () const { return const_reverse_iterator (begin ()); } reference operator[] (size_type n) { return *(begin () + n); } const_reference operator[] (size_type n) const { return *(begin () + n); } reference front () { return *begin (); } const_reference front () const { return *begin (); } reference back () { return *(_end - 1); } const_reference back () const { return *(_end - 1); } bool empty () const { return begin () == end (); } size_type size () const { return _end - _start; } size_type max_size () const { return size (); } // // dummy clear functions // void erase (iterator /* first */, iterator /* last */) { _start = _end; } void clear () { _start = _end; } // // for resize, can only shrink. Used to handle trail context // No size checking is done. // void resize (size_type newsize) { _end = _start + newsize; } // // printing function for convenience // std::ostream& operator<< (std::ostream& os) { return os.write (&_start[0], static_cast(size ())); } // type convertions operator std::string () const { return std::string (_start, _end); } operator std::vector () const { return std::vector (_start, _end); } }; // // This is a generic printing function to overcome the // problem printing with a vector array. // template std::ostream& operator<< (std::ostream& os, const T& array) { return os.write (&(array.begin ())[0], array.size ()); } // // There are three types of _yyText possible, each has an advantage. // // std::string is useful for text scanners. std::vector is // useful for binary scanners. For scanners that needs fast speed // though, yoolex::PseudoVector is prefered. Keep in mind that // one could always cast yoolex::PseudoVector into std::string or // std::vector whenever necessary // // // YooLex is the base class for all generated scanner classes // Not all features are available to the base class due to the // fact that the derived class may be optimized for line input, // binary input, or speed. // template class YooLex { public: ///////////////////////////////////////////////////////// // // Public Data Types // ///////////////////////////////////////////////////////// typedef YYTEXT_TYPE YYTextType; typedef YYS_TYPE YYSType; protected: ///////////////////////////////////////////////////////// // // Internal Working Variables, Don't touch them! // ///////////////////////////////////////////////////////// bool _yyReturnValue; // check if yyLexCase returned a value typedef std::vector YYInternalBuffer; YYInternalBuffer _yyInternalBuffer; // current character buffer YYInternalBuffer::iterator _yyLastMatchStart; // the start postion of the last match private: ///////////////////////////////////////////////////////// // // Private Variables // ///////////////////////////////////////////////////////// std::stack *_yyLexStateStack; protected: ///////////////////////////////////////////////////////// // // Protected Variables // ///////////////////////////////////////////////////////// size_t _yyBaseState; // the current base state std::istream *_yyIS; // the input stream std::ostream *_yyOS; // the output stream, only used as default output bool _yyIsBOL; // is at beginning of the line? bool _yyIsNextBOL; // is the next yylex pattern at BOL? bool _yyBlockMode; // is the stream block buffered or line buffered size_t _yyCharNum; // the current char count size_t _yyLineNum; // the current line number size_t _yyReadBufferSize; // the number of characters to try to read at one time ///////////////////////////////////////////////////////// // // Internal functions // ///////////////////////////////////////////////////////// // // std::getline routines will strip off the NL character and that // make it hard to distinguish EOL and EOF that the scanner must // handle differently. // // This specialized version preserve the deliminator, but does // not add the terminating NULL character. It does not set any // fail/eof bit either. // // returns the # of bytes read. // std::streamsize yyGetLine (char *str); // // used to refill the input buffer from the current input stream. // Returns false if no more input is left after calling yyWrap. // bool yyRefreshBuffer (); // // used to notify that the current input stream is empty. // return true if no more input, false otherwise. // overload this function to switch input buffer upon EOF // virtual bool yyWrap (); // // The following function only appears in the derived class. // // // process the matched string. this is a generated function // // YYSType yyLexCase (int yySwitch); // public: ///////////////////////////////////////////////////////// // // Public Variables // ///////////////////////////////////////////////////////// YYTEXT_TYPE _yyText; // declare yytext ///////////////////////////////////////////////////////// // // Public Functions // ///////////////////////////////////////////////////////// YooLex () // constructor : _yyLexStateStack (NULL), _yyBaseState (0), _yyIS (&std::cin), _yyOS (&std::cout), _yyIsBOL (true), _yyIsNextBOL (true), _yyBlockMode (true), _yyCharNum (0), _yyLineNum (0), _yyReadBufferSize (8192) { _yyLastMatchStart = _yyInternalBuffer.begin (); } virtual ~YooLex (); // destructor // // set a different begin state // void yyBegin (int state) { _yyBaseState = state; } void yyPushState (int state) { if (!_yyLexStateStack) _yyLexStateStack = new std::stack (); _yyLexStateStack->push (_yyBaseState); yyBegin (state); } int yyTopState () { return _yyLexStateStack->top (); } void yyPopState () { _yyBaseState = yyTopState (); _yyLexStateStack->pop (); } // // set I/O stream // void yySetInput (std::istream& is) { _yyIS = &is; } void yySetOutput (std::ostream& os) { _yyOS = &os; } // // get/set the amount to try to read each time // void yySetReadBufferSize (size_t size) { _yyReadBufferSize = size; } size_t yyReadBufferSize () const { return _yyReadBufferSize; } // // get/set block mode // void yySetBlockMode (bool blockMode) { _yyBlockMode = blockMode; } bool yyBlockMode () const { return _yyBlockMode; } // // the main lex function // // if this function is called often and polymorphism is not used to // switch among different lexers, comment out this function // virtual YYSType yyLex () = 0; ///////////////////////////////////////////////////////// // // Public Utililty Functions // ///////////////////////////////////////////////////////// // // yyIsBOL and yySetBOL are supported only for scanners that contain // BOL states or with "bol" or "line" option is turned on // bool yyIsBOL () const { return _yyIsBOL; } // // yySetBOL force the next pattern to be scanned as if it is at the // BOL. // void yySetBOL () { _yyIsNextBOL = true; } // // yyGetCharNum is supported only if "char" option is turned on // size_t yyGetCharNum () const { return _yyCharNum; } // // yyGetLineNum is supported only if "line" option is turned on // size_t yyGetLineNum () const { return _yyLineNum; } // // The use of the following functions are strongly discouraged // since they invalidate _yyText's pointers if PseudoVector is // used as _yyText type. Use them only when _yyText is no longer // needed. One could of course use the casting operators to // generate a copy of _yyText before doing the following. // // // unput a char or count# of chars back to re-scan // void yyUnput (char c); void yyUnput (const char *start, size_t count); // // For the following function to work, the unput size must not // exceed the length of yytext.size (). No checking was done // Use this version instead of the above for speed. // // // just rewind the lookahead pointer a little. // void yyUnput (int size) { _yyLastMatchStart -= size; } // // Read the next charater from the input. Returns EOF if can not read // the next character. // int yyInput (); // // Read a block of characters from the input. Returns and iterator to // where the read ended. If the return value is not equal to end // then the read was short. // template ForwardIter yyInput (ForwardIter begin, ForwardIter end); }; ///////////////////////////////////////////////////////// // // Non-inline member functions // ///////////////////////////////////////////////////////// template YooLex::~YooLex () { delete _yyLexStateStack; } template std::streamsize YooLex::yyGetLine (char *str) { std::istream& is = *_yyIS; if (is.eof ()) return 0; std::streamsize count = yyReadBufferSize (); std::streamsize readCount = 0; std::streambuf *sb = is.rdbuf (); while (true) { int ch = sb->sbumpc (); if (ch == EOF) return readCount; ++readCount; *str++ = static_cast(ch); if (static_cast(ch) == '\n' || --count <= 0) return readCount; } } template bool YooLex::yyRefreshBuffer () { const int oldSize = _yyInternalBuffer.end () - _yyLastMatchStart; std::copy (_yyLastMatchStart, _yyLastMatchStart + oldSize, _yyInternalBuffer.begin ()); _yyInternalBuffer.resize (oldSize + yyReadBufferSize ()); while (true) { std::streamsize count; if (_yyBlockMode) count = _yyIS->rdbuf ()->sgetn (&(_yyInternalBuffer[oldSize]), yyReadBufferSize ()); else count = yyGetLine (static_cast(&(_yyInternalBuffer[oldSize]))); if (count != 0) { _yyInternalBuffer.resize (oldSize + count); _yyLastMatchStart = _yyInternalBuffer.begin (); return true; } _yyIS->setstate (std::ios::failbit | std::ios::eofbit); if (yyWrap ()) { _yyInternalBuffer.resize (oldSize); _yyLastMatchStart = _yyInternalBuffer.begin (); return false; } } } template bool YooLex::yyWrap () { return true; } template void YooLex::yyUnput (char c) { if (_yyLastMatchStart > _yyInternalBuffer.begin ()) *--_yyLastMatchStart = c; else { _yyInternalBuffer.insert (_yyInternalBuffer.begin (), c); _yyLastMatchStart = _yyInternalBuffer.begin (); } } template void YooLex::yyUnput (const char *start, size_t count) { if (static_cast(_yyLastMatchStart - _InternalBuffer.begin ()) >= count) { _yyLastMatchStart -= count; std::copy (start, start + count, static_cast(&_yyLastMatchStart[0])); } else { int index = _yyLastMatchStart - _yyInternalBuffer.begin (); int oldLength = _yyInternalBuffer.size () - index; _yyInternalBuffer.resize (oldLength + count); const YYInternalBuffer::iterator b = _yyInternalBuffer.begin (); std::copy_backward (b + index, b + index + oldLength, _yyInternalBuffer.end ()); std::copy (start, start + count, b); _yyLastMatchStart = b; } } template int YooLex::yyInput () { if (_yyLastMatchStart >= _yyInternalBuffer.end ()) { if (! yyRefreshBuffer ()) return EOF; } return *(_yyLastMatchStart++); } template template ForwardIter YooLex::yyInput (ForwardIter begin, ForwardIter end) { typename std::iterator_traits::difference_type todo = std::distance (begin, end); while (todo > 0) { if (_yyLastMatchStart == _yyInternalBuffer.end()) { if (! yyRefreshBuffer ()) break; } typename std::iterator_traits::difference_type number = std::min (todo, std::distance (_yyLastMatchStart, _yyInternalBuffer.end())); std::copy (_yyLastMatchStart, _yyLastMatchStart + number, begin); _yyLastMatchStart += number; std::advance (begin, number); todo -= number; } return begin; } } // namespace yoogroup ///////////////////////////////////////////////////////// // // Modification Log // // $Log: yoolex.hh,v $ // Revision 1.8 2003/01/19 21:33:57 coconut // *** empty log message *** // // Revision 1.44 2003/01/15 17:46:42 coconut // 1. added yyTopState () function // // Revision 1.43 2003/01/15 08:57:57 coconut // 1. fixed line option bug // 2. added char option for character counting // 3. added yyPushState & yyPopState functions. Need testing // // Revision 1.42 2002/10/27 08:27:14 coconut // removed copy_n all together to use std::copy instead, just change // the parameter a little. // // Revision 1.41 2002/10/17 17:27:37 rminsk // With gcc-3.2 copy_n is no longer in the std namespace since it was an SGI // extension to the standard. Import copy_n from the __gnu_cxx namespace. // // Revision 1.40 2002/10/04 01:03:29 rminsk // Change old style c casts to new style c++ casts in generated code. // // Revision 1.39 2002/10/03 01:02:42 rminsk // 1. Add option "baseclass" to derive the lexer from a different baseclass. // 2. Made block mode a member variable to get yyInput to work properly. // // Revision 1.38 2002/08/28 02:29:16 rminsk // 1. Fixed warning message in "DFARow::operator =" about not returning a value. // 2. First attempt at adding YYSType. In doing so added a new option yystype. // // Revision 1.37 2002/07/27 01:22:37 coconut // 1. removed yymain.cc which is nolonger necessary // 2. striped .cc/.hh/.l/.y header since Id tag provides the info as well // 3. changed the copyright message // // Revision 1.36 2002/07/06 00:14:35 coconut // 1. added some debug macro, but still need to add in the debug codes // 2. moved yyRefreshBuffer to base class // // Revision 1.35 2002/07/01 22:20:46 coconut // 1. added typedef for YYTextType back in // 2. some changes in compresss_max.cc for YooParse. // // Revision 1.34 2002/06/29 00:05:35 coconut // 1. set the naming convention of all instance variables to begin with _yy // and all functions to begin with yy. This way, all lexer functions // are uniquely identified. // 2. changed getline from static to non-static and rename it to yyGetLine. // // Revision 1.33 2002/03/27 20:41:05 rminsk // Fixed bug in yyinput for multiple characters. // // Revision 1.32 2002/03/27 07:04:44 rminsk // Removed extra ";" at the end of the namespace. // // Revision 1.31 2002/03/26 21:12:17 rminsk // Made the read buffer size a member of the yoolex base class. // // Revision 1.30 2002/03/26 04:10:33 rminsk // 1. Made some of the inline functions in yoolex.hh non-inline. These were // mainly virtual functions and the large unput functions. // 2. Added two versions of yyinput. The first version works like the standard // lex yyinput function. The second version will read multiple characters // from the input stream at one time. The second is a nested templated // function which may not work on some compilers. // 3. Since yyinput may also have to read from the buffer and reload the buffer // I made a new virtual function yyupdatebuffer that refills the buffer. This // virtual method is not called very often so very little overhead was added // to yylex. // 4. In the process of adding yyupdatebuffer a bug was discovered in that the // stream buffer was not getting reset when yywrap was called. After // yywrap was called the buffered I/O version was trying to read from the // old streambuf. // // Revision 1.29 2002/03/23 02:34:06 rminsk // Removed unused variable YooLex::_lookahead // // Revision 1.28 2002/03/22 07:33:35 rminsk // Moved yytestType to YooLex base class. // // Revision 1.27 2002/03/22 02:43:59 rminsk // Added operator[] to access individual characters PseudoVector. // // Revision 1.26 2002/02/20 03:21:17 rminsk // Instead of using a typedef of YYTEXT_TYPE to declare the text buffer type, // made the base class a template baseclass. // // Revision 1.25 2002/01/30 05:34:40 coconut // made compatible w/ g++ 3.0.3 // // Revision 1.24 2002/01/28 05:52:50 coconut // fix some bugs in yoolex.hh. // // Revision 1.23 2002/01/27 06:31:06 coconut // Change the way of specifying yytext container type a little // to reduce macros defines. Changed YYTEXT_DECL to YYTEXT_TYPE. // // Revision 1.22 2002/01/27 06:10:11 coconut // added setBOL function since it is now feasible doing so with little // trouble. // // Revision 1.21 2002/01/27 03:45:40 coconut // Added line buffering back in. Also added "linemode" option to // turn this feature on. // // Revision 1.20 2002/01/26 09:06:30 coconut // Finish commenting on unput functions // // Revision 1.19 2002/01/26 08:24:54 coconut // Add resize () to yoolex::PseudoVector to fix a trail context bug. // // Revision 1.18 2002/01/26 07:52:01 coconut // 1. Change the entire buffering scheme. The current implementation // use block buffering instead of line buffering to boost the performance // significantly. // 2. Changed yytext from std::string to a default yoolex::PseudoVector. // A macro YYTEXT_DECL can be modified to change to another container // such as std::vector or std::string. The default PseudoVector // saves YooLex making unnecessary copies over and over again. yoolex.hh // is revised signficantly to reflect the changes. // 3. Removed yoolex.cc since it is nolonger necessary. // 4. Removed "outfile" option and replaced it with "ccfile" and "hhfile" // to specify the output name directly. // 5. Added "bol" and "line" options to optionally check BOL status and // update line numbers. // // Revision 1.17 2002/01/22 06:07:24 coconut // 1. fixed a mysterious core dump bug related to ofstream (_osHeader). // The fix was basically changing all outendl to endl for outputing // w/ _osHeader. // 2. removed yycase from the base YooLex class and changed it to protected. // It is more nature since there is generally no way, other than yylex () // to call yycase w/ correct switch value // 3. add a macro #define yyleng yytext.size () // // Revision 1.16 2002/01/19 02:51:41 coconut // 1. changed _is and _os so that they are now pointers instead objects // 2. slightly improved the speed of yylex () function. // 3. added std::ios::sync_with_stdio (false); to yymain to improve // default io speed. At least it made a difference in g++ 2.95.3 // // Revision 1.15 2002/01/19 00:19:16 rminsk // 1. Changed getline to be a static member function to facilitate chnaging the // buffer type. // 2. Changed the buffer from std::string to the abstract type Buffer. // 3. Defined Buffer to be std::vector. // // Revision 1.14 2001/12/04 04:45:58 coconut // 1. Forgot to update another yycase to put class name when calling it. // 2. removed class _matchedState and use a local variable instead. // // Revision 1.13 2001/11/24 11:37:10 coconut // 1. changed how BOL is handled. The newer algorithm is similar to Flex // and can be slighlty faster than the old one YooLex had. The down // side is the much bigger DFA table. // 2. merged yylex and match functions to further reduce amount of // initiatitions and # of function calls. The derived class // constructor is now empty (); // 3. Added the yyless macro, and removed some macros no longer used // 4. Add setOutput and updated ECHO at the same time // 5. Some other minor changes in the C++ code generator. // 6. Fixed a bug that if no %% is specified at end of section 2, no DFA // table is produced. // 7. Fixed a bug in yycase that if no . is specified, the scanner may fall // into an endless loop. // // Revision 1.12 2001/11/14 02:56:43 coconut // 1. Finally implemented a good compression algorithm that is about as // good as flex's // 2. fixed a bug that if a few patterns are linked together using '|' // and one of them has trail context would cause a problem. // 3. Now there are three DFA table options: ecs, max and full. // // Revision 1.11 2001/10/31 21:39:27 coconut // 1. Added a poorly implemented compression algorithm :) // 2. Fixed a condition bug in the parser // 3. handles trail context individually rather than through a // centralized yylex function. // 4. handles EOF differently. Now <> without conditions // specified will only add that action to // 5. Renders yy_value table unnecessary. // 6. moved YooLex::match to generated classes. // 7. updated IMPLEMENTATION // 8. Found a bug in handling \^, but it is not fixed yet. // // Revision 1.10 2001/10/25 07:09:06 coconut // 1. Moved some macros from yoolex.hh into the generated C++ source file // and yoolex.cc. // 2. Some cosmetic changes to the generated C++ source and header files. // // Revision 1.9 2001/10/25 05:15:21 coconut // 1. Re-defined how C++ code generator works. Basicly, the new scheme // eliminated h1 and h2 header blocks. Instead, it by default generates // a header template that a user may modify and change. YooLex now has // an option that tells it not to override the header file if it exists. // // Revision 1.8 2001/10/22 14:08:23 coconut // remove an unused #undef // // Revision 1.7 2001/10/22 13:22:19 coconut // // 1. Removed two nouse configuration parameters. // 2. Tried to avoid the use of Constructor in the generated source to // give the programmer the greater freedom. // // Revision 1.6 2001/10/22 12:43:51 coconut // 1. Output file, class name, file extensions are now configurable // 2. Added support of other configuration options // 3. Custom class definition is now possible. The scanner generator // is now truly capable of multiple instance. // // Revision 1.5 2001/10/22 01:19:40 coconut // Moved updateBuffer function codes from yoolex.hh to yoolex.cc // // Revision 1.4 2001/10/21 23:23:15 coconut // // 1. Organized yoolex.hh to make it more structured. // 2. Now prints the statistics of the yoolex generated tables by default. // 3. Updated README with limitations // // Revision 1.3 2001/10/21 21:52:01 coconut // // Fixed unput bug and YYDOCASE bug. The trails should now be // handled correctly. Added another unput (int size) function // to speed up the operation in certain cases. // // Revision 1.2 2001/10/21 09:08:09 coconut // *** empty log message *** // // ///////////////////////////////////////////////////////// #endif // __YOOLEX_HH__