lexer.cpp

00001 // -*- c-basic-offset: 2 -*-
00002 /*
00003  *  This file is part of the KDE libraries
00004  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
00005  *
00006  *  This library is free software; you can redistribute it and/or
00007  *  modify it under the terms of the GNU Library General Public
00008  *  License as published by the Free Software Foundation; either
00009  *  version 2 of the License, or (at your option) any later version.
00010  *
00011  *  This library is distributed in the hope that it will be useful,
00012  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *  Library General Public License for more details.
00015  *
00016  *  You should have received a copy of the GNU Library General Public License
00017  *  along with this library; see the file COPYING.LIB.  If not, write to
00018  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  *  Boston, MA 02110-1301, USA.
00020  *
00021  */
00022 
00023 #ifdef HAVE_CONFIG_H
00024 #include <config.h>
00025 #endif
00026 
00027 #include <ctype.h>
00028 #include <stdlib.h>
00029 #include <stdio.h>
00030 #include <string.h>
00031 #include <assert.h>
00032 
00033 #include "value.h"
00034 #include "object.h"
00035 #include "types.h"
00036 #include "interpreter.h"
00037 #include "nodes.h"
00038 #include "lexer.h"
00039 #include "identifier.h"
00040 #include "lookup.h"
00041 #include "internal.h"
00042 #include "dtoa.h"
00043 
00044 // we can't specify the namespace in yacc's C output, so do it here
00045 using namespace KJS;
00046 
00047 static Lexer *currLexer = 0;
00048 
00049 #ifndef KDE_USE_FINAL
00050 #include "grammar.h"
00051 #endif
00052 
00053 #include "lexer.lut.h"
00054 
00055 extern YYLTYPE yylloc; // global bison variable holding token info
00056 
00057 // a bridge for yacc from the C world to C++
00058 int kjsyylex()
00059 {
00060   return Lexer::curr()->lex();
00061 }
00062 
00063 Lexer::Lexer()
00064   : yylineno(1),
00065     size8(128), size16(128), restrKeyword(false),
00066     eatNextIdentifier(false), stackToken(-1), lastToken(-1), pos(0),
00067     code(0), length(0),
00068 #ifndef KJS_PURE_ECMA
00069     bol(true),
00070 #endif
00071     current(0), next1(0), next2(0), next3(0),
00072     strings(0), numStrings(0), stringsCapacity(0),
00073     identifiers(0), numIdentifiers(0), identifiersCapacity(0)
00074 {
00075   // allocate space for read buffers
00076   buffer8 = new char[size8];
00077   buffer16 = new UChar[size16];
00078   currLexer = this;
00079 }
00080 
00081 Lexer::~Lexer()
00082 {
00083   delete [] buffer8;
00084   delete [] buffer16;
00085 }
00086 
00087 Lexer *Lexer::curr()
00088 {
00089   if (!currLexer) {
00090     // create singleton instance
00091     currLexer = new Lexer();
00092   }
00093   return currLexer;
00094 }
00095 
00096 #ifdef KJS_DEBUG_MEM
00097 void Lexer::globalClear()
00098 {
00099   delete currLexer;
00100   currLexer = 0L;
00101 }
00102 #endif
00103 
00104 void Lexer::setCode(const UChar *c, unsigned int len)
00105 {
00106   yylineno = 1;
00107   restrKeyword = false;
00108   delimited = false;
00109   eatNextIdentifier = false;
00110   stackToken = -1;
00111   lastToken = -1;
00112   foundBad = false;
00113   pos = 0;
00114   code = c;
00115   length = len;
00116   skipLF = false;
00117   skipCR = false;
00118 #ifndef KJS_PURE_ECMA
00119   bol = true;
00120 #endif
00121 
00122   // read first characters
00123   current = (length > 0) ? code[0].uc : 0;
00124   next1 = (length > 1) ? code[1].uc : 0;
00125   next2 = (length > 2) ? code[2].uc : 0;
00126   next3 = (length > 3) ? code[3].uc : 0;
00127 }
00128 
00129 void Lexer::shift(unsigned int p)
00130 {
00131   while (p--) {
00132     pos++;
00133     current = next1;
00134     next1 = next2;
00135     next2 = next3;
00136     next3 = (pos + 3 < length) ? code[pos+3].uc : 0;
00137   }
00138 }
00139 
00140 // called on each new line
00141 void Lexer::nextLine()
00142 {
00143   yylineno++;
00144 #ifndef KJS_PURE_ECMA
00145   bol = true;
00146 #endif
00147 }
00148 
00149 void Lexer::setDone(State s)
00150 {
00151   state = s;
00152   done = true;
00153 }
00154 
00155 int Lexer::lex()
00156 {
00157   int token = 0;
00158   state = Start;
00159   unsigned short stringType = 0; // either single or double quotes
00160   pos8 = pos16 = 0;
00161   done = false;
00162   terminator = false;
00163   skipLF = false;
00164   skipCR = false;
00165 
00166   // did we push a token on the stack previously ?
00167   // (after an automatic semicolon insertion)
00168   if (stackToken >= 0) {
00169     setDone(Other);
00170     token = stackToken;
00171     stackToken = 0;
00172   }
00173 
00174   while (!done) {
00175     if (skipLF && current != '\n') // found \r but not \n afterwards
00176         skipLF = false;
00177     if (skipCR && current != '\r') // found \n but not \r afterwards
00178         skipCR = false;
00179     if (skipLF || skipCR) // found \r\n or \n\r -> eat the second one
00180     {
00181         skipLF = false;
00182         skipCR = false;
00183         shift(1);
00184     }
00185 
00186     bool cr = (current == '\r');
00187     bool lf = (current == '\n');
00188     if (cr)
00189       skipLF = true;
00190     else if (lf)
00191       skipCR = true;
00192     bool isLineTerminator = cr || lf;
00193 
00194     switch (state) {
00195     case Start:
00196       if (isWhiteSpace(current)) {
00197         // do nothing
00198       } else if (current == '/' && next1 == '/') {
00199         shift(1);
00200         state = InSingleLineComment;
00201       } else if (current == '/' && next1 == '*') {
00202         shift(1);
00203         state = InMultiLineComment;
00204       } else if (current == 0) {
00205         if (!terminator && !delimited) {
00206           // automatic semicolon insertion if program incomplete
00207           token = ';';
00208           stackToken = 0;
00209           setDone(Other);
00210         } else
00211           setDone(Eof);
00212       } else if (isLineTerminator) {
00213         nextLine();
00214         terminator = true;
00215         if (restrKeyword) {
00216           token = ';';
00217           setDone(Other);
00218         }
00219       } else if (current == '"' || current == '\'') {
00220         state = InString;
00221         stringType = current;
00222       } else if (isIdentLetter(current)) {
00223         record16(current);
00224         state = InIdentifier;
00225       } else if (current == '0') {
00226         record8(current);
00227         state = InNum0;
00228       } else if (isDecimalDigit(current)) {
00229         record8(current);
00230         state = InNum;
00231       } else if (current == '.' && isDecimalDigit(next1)) {
00232         record8(current);
00233         state = InDecimal;
00234 #ifndef KJS_PURE_ECMA
00235         // <!-- marks the beginning of a line comment (for www usage)
00236       } else if (current == '<' && next1 == '!' &&
00237                  next2 == '-' && next3 == '-') {
00238         shift(3);
00239         state = InSingleLineComment;
00240         // same for -->
00241       } else if (bol && current == '-' && next1 == '-' &&  next2 == '>') {
00242         shift(2);
00243         state = InSingleLineComment;
00244 #endif
00245       } else {
00246         token = matchPunctuator(current, next1, next2, next3);
00247         if (token != -1) {
00248           setDone(Other);
00249         } else {
00250           //      cerr << "encountered unknown character" << endl;
00251           setDone(Bad);
00252         }
00253       }
00254       break;
00255     case InString:
00256       if (current == stringType) {
00257         shift(1);
00258         setDone(String);
00259       } else if (current == 0 || isLineTerminator) {
00260         setDone(Bad);
00261       } else if (current == '\\') {
00262         state = InEscapeSequence;
00263       } else {
00264         record16(current);
00265       }
00266       break;
00267     // Escape Sequences inside of strings
00268     case InEscapeSequence:
00269       if (isOctalDigit(current)) {
00270         if (current >= '0' && current <= '3' &&
00271             isOctalDigit(next1) && isOctalDigit(next2)) {
00272           record16(convertOctal(current, next1, next2));
00273           shift(2);
00274           state = InString;
00275         } else if (isOctalDigit(current) && isOctalDigit(next1)) {
00276           record16(convertOctal('0', current, next1));
00277           shift(1);
00278           state = InString;
00279         } else if (isOctalDigit(current)) {
00280           record16(convertOctal('0', '0', current));
00281           state = InString;
00282         } else {
00283           setDone(Bad);
00284         }
00285       } else if (current == 'x')
00286         state = InHexEscape;
00287       else if (current == 'u')
00288         state = InUnicodeEscape;
00289       else {
00290     if (isLineTerminator)
00291       nextLine();
00292         record16(singleEscape(current));
00293         state = InString;
00294       }
00295       break;
00296     case InHexEscape:
00297       if (isHexDigit(current) && isHexDigit(next1)) {
00298         state = InString;
00299         record16(convertHex(current, next1));
00300         shift(1);
00301       } else if (current == stringType) {
00302         record16('x');
00303         shift(1);
00304         setDone(String);
00305       } else {
00306         record16('x');
00307         record16(current);
00308         state = InString;
00309       }
00310       break;
00311     case InUnicodeEscape:
00312       if (isHexDigit(current) && isHexDigit(next1) &&
00313           isHexDigit(next2) && isHexDigit(next3)) {
00314         record16(convertUnicode(current, next1, next2, next3));
00315         shift(3);
00316         state = InString;
00317       } else if (current == stringType) {
00318         record16('u');
00319         shift(1);
00320         setDone(String);
00321       } else {
00322         setDone(Bad);
00323       }
00324       break;
00325     case InSingleLineComment:
00326       if (isLineTerminator) {
00327         nextLine();
00328         terminator = true;
00329         if (restrKeyword) {
00330           token = ';';
00331           setDone(Other);
00332         } else
00333           state = Start;
00334       } else if (current == 0) {
00335         setDone(Eof);
00336       }
00337       break;
00338     case InMultiLineComment:
00339       if (current == 0) {
00340         setDone(Bad);
00341       } else if (isLineTerminator) {
00342         nextLine();
00343       } else if (current == '*' && next1 == '/') {
00344         state = Start;
00345         shift(1);
00346       }
00347       break;
00348     case InIdentifier:
00349       if (isIdentLetter(current) || isDecimalDigit(current)) {
00350         record16(current);
00351         break;
00352       }
00353       setDone(Identifier);
00354       break;
00355     case InNum0:
00356       if (current == 'x' || current == 'X') {
00357         record8(current);
00358         state = InHex;
00359       } else if (current == '.') {
00360         record8(current);
00361         state = InDecimal;
00362       } else if (current == 'e' || current == 'E') {
00363         record8(current);
00364         state = InExponentIndicator;
00365       } else if (isOctalDigit(current)) {
00366         record8(current);
00367         state = InOctal;
00368       } else if (isDecimalDigit(current)) {
00369         record8(current);
00370         state = InDecimal;
00371       } else {
00372         setDone(Number);
00373       }
00374       break;
00375     case InHex:
00376       if (isHexDigit(current)) {
00377         record8(current);
00378       } else {
00379         setDone(Hex);
00380       }
00381       break;
00382     case InOctal:
00383       if (isOctalDigit(current)) {
00384         record8(current);
00385       }
00386       else if (isDecimalDigit(current)) {
00387         record8(current);
00388         state = InDecimal;
00389       } else
00390         setDone(Octal);
00391       break;
00392     case InNum:
00393       if (isDecimalDigit(current)) {
00394         record8(current);
00395       } else if (current == '.') {
00396         record8(current);
00397         state = InDecimal;
00398       } else if (current == 'e' || current == 'E') {
00399         record8(current);
00400         state = InExponentIndicator;
00401       } else
00402         setDone(Number);
00403       break;
00404     case InDecimal:
00405       if (isDecimalDigit(current)) {
00406         record8(current);
00407       } else if (current == 'e' || current == 'E') {
00408         record8(current);
00409         state = InExponentIndicator;
00410       } else
00411         setDone(Number);
00412       break;
00413     case InExponentIndicator:
00414       if (current == '+' || current == '-') {
00415         record8(current);
00416       } else if (isDecimalDigit(current)) {
00417         record8(current);
00418         state = InExponent;
00419       } else
00420         setDone(Bad);
00421       break;
00422     case InExponent:
00423       if (isDecimalDigit(current)) {
00424         record8(current);
00425       } else
00426         setDone(Number);
00427       break;
00428     default:
00429       assert(!"Unhandled state in switch statement");
00430     }
00431 
00432     // move on to the next character
00433     if (!done)
00434       shift(1);
00435 #ifndef KJS_PURE_ECMA
00436     if (state != Start && state != InSingleLineComment)
00437       bol = false;
00438 #endif
00439   }
00440 
00441   // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
00442   if ((state == Number || state == Octal || state == Hex)
00443       && isIdentLetter(current))
00444     state = Bad;
00445 
00446   // terminate string
00447   buffer8[pos8] = '\0';
00448 
00449 #ifdef KJS_DEBUG_LEX
00450   fprintf(stderr, "line: %d ", lineNo());
00451   fprintf(stderr, "yytext (%x): ", buffer8[0]);
00452   fprintf(stderr, "%s ", buffer8);
00453 #endif
00454 
00455   long double dval = 0;
00456   if (state == Number) {
00457     dval = kjs_strtod(buffer8, 0L);
00458   } else if (state == Hex) { // scan hex numbers
00459     dval = 0;
00460     if (buffer8[0] == '0' && (buffer8[1] == 'x' || buffer8[1] == 'X')) {
00461       for (const char *p = buffer8+2; *p; p++) {
00462     if (!isHexDigit(*p)) {
00463       dval = 0;
00464       break;
00465     }
00466     dval = dval * 16 + convertHex(*p);
00467       }
00468     }
00469     state = Number;
00470   } else if (state == Octal) {   // scan octal number
00471     dval = 0;
00472     if (buffer8[0] == '0') {
00473       for (const char *p = buffer8+1; *p; p++) {
00474     if (*p < '0' || *p > '7') {
00475       dval = 0;
00476       break;
00477     }
00478     dval = dval * 8 + *p - '0';
00479       }
00480     }
00481     state = Number;
00482   }
00483 
00484 #ifdef KJS_DEBUG_LEX
00485   switch (state) {
00486   case Eof:
00487     printf("(EOF)\n");
00488     break;
00489   case Other:
00490     printf("(Other)\n");
00491     break;
00492   case Identifier:
00493     printf("(Identifier)/(Keyword)\n");
00494     break;
00495   case String:
00496     printf("(String)\n");
00497     break;
00498   case Number:
00499     printf("(Number)\n");
00500     break;
00501   default:
00502     printf("(unknown)");
00503   }
00504 #endif
00505 
00506   if (state != Identifier && eatNextIdentifier)
00507     eatNextIdentifier = false;
00508 
00509   restrKeyword = false;
00510   delimited = false;
00511   kjsyylloc.first_line = yylineno; // ???
00512   kjsyylloc.last_line = yylineno;
00513 
00514   switch (state) {
00515   case Eof:
00516     token = 0;
00517     break;
00518   case Other:
00519     if(token == '}' || token == ';') {
00520       delimited = true;
00521     }
00522     break;
00523   case Identifier:
00524     if ((token = Lookup::find(&mainTable, buffer16, pos16)) < 0) {
00525       // Lookup for keyword failed, means this is an identifier
00526       // Apply anonymous-function hack below (eat the identifier)
00527       if (eatNextIdentifier) {
00528         eatNextIdentifier = false;
00529 #ifdef KJS_VERBOSE
00530         UString debugstr(buffer16, pos16); fprintf(stderr,"Anonymous function hack: eating identifier %s\n",debugstr.ascii());
00531 #endif
00532         token = lex();
00533         break;
00534       }
00535       /* TODO: close leak on parse error. same holds true for String */
00536       kjsyylval.ident = makeIdentifier(buffer16, pos16);
00537       token = IDENT;
00538       break;
00539     }
00540 
00541     eatNextIdentifier = false;
00542     // Hack for "f = function somename() { ... }", too hard to get into the grammar
00543     // Same for building an array with function pointers ( 'name', func1, 'name2', func2 )
00544     // There are lots of other uses, we really have to get this into the grammar
00545     if ( token == FUNCTION &&
00546          ( lastToken == '=' || lastToken == ',' || lastToken == '(' ) )
00547             eatNextIdentifier = true;
00548 
00549     if (token == CONTINUE || token == BREAK ||
00550         token == RETURN || token == THROW)
00551       restrKeyword = true;
00552     break;
00553   case String:
00554     kjsyylval.ustr = makeUString(buffer16, pos16);
00555     token = STRING;
00556     break;
00557   case Number:
00558     kjsyylval.dval = dval;
00559     token = NUMBER;
00560     break;
00561   case Bad:
00562     foundBad = true;
00563     return -1;
00564   default:
00565     assert(!"unhandled numeration value in switch");
00566     return -1;
00567   }
00568   lastToken = token;
00569   return token;
00570 }
00571 
00572 bool Lexer::isWhiteSpace(unsigned short c)
00573 {
00574   return (c == ' ' || c == '\t' ||
00575           c == 0x0b || c == 0x0c || c == 0xa0);
00576 }
00577 
00578 bool Lexer::isIdentLetter(unsigned short c)
00579 {
00580   // Allow any character in the Unicode categories
00581   // Uppercase letter (Lu), Lowercase letter (Ll),
00582   // Titlecase letter (Lt)", Modifier letter (Lm),
00583   // Other letter (Lo), or Letter number (Nl).
00584   // Also see: http://www.unicode.org/Public/UNIDATA/UnicodeData.txt */
00585   return (c >= 'a' && c <= 'z' ||
00586           c >= 'A' && c <= 'Z' ||
00587           // A with grave - O with diaeresis
00588           c >= 0x00c0 && c <= 0x00d6 ||
00589           // O with stroke - o with diaeresis
00590           c >= 0x00d8 && c <= 0x00f6 ||
00591           // o with stroke - turned h with fishook and tail
00592           c >= 0x00f8 && c <= 0x02af ||
00593           // Greek etc. TODO: not precise
00594           c >= 0x0388 && c <= 0x1ffc ||
00595           c == '$' || c == '_');
00596   /* TODO: use complete category table */
00597 }
00598 
00599 bool Lexer::isDecimalDigit(unsigned short c)
00600 {
00601   return (c >= '0' && c <= '9');
00602 }
00603 
00604 bool Lexer::isHexDigit(unsigned short c)
00605 {
00606   return (c >= '0' && c <= '9' ||
00607           c >= 'a' && c <= 'f' ||
00608           c >= 'A' && c <= 'F');
00609 }
00610 
00611 bool Lexer::isOctalDigit(unsigned short c)
00612 {
00613   return (c >= '0' && c <= '7');
00614 }
00615 
00616 int Lexer::matchPunctuator(unsigned short c1, unsigned short c2,
00617                               unsigned short c3, unsigned short c4)
00618 {
00619   if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
00620     shift(4);
00621     return URSHIFTEQUAL;
00622   } else if (c1 == '=' && c2 == '=' && c3 == '=') {
00623     shift(3);
00624     return STREQ;
00625   } else if (c1 == '!' && c2 == '=' && c3 == '=') {
00626     shift(3);
00627     return STRNEQ;
00628    } else if (c1 == '>' && c2 == '>' && c3 == '>') {
00629     shift(3);
00630     return URSHIFT;
00631   } else if (c1 == '<' && c2 == '<' && c3 == '=') {
00632     shift(3);
00633     return LSHIFTEQUAL;
00634   } else if (c1 == '>' && c2 == '>' && c3 == '=') {
00635     shift(3);
00636     return RSHIFTEQUAL;
00637   } else if (c1 == '<' && c2 == '=') {
00638     shift(2);
00639     return LE;
00640   } else if (c1 == '>' && c2 == '=') {
00641     shift(2);
00642     return GE;
00643   } else if (c1 == '!' && c2 == '=') {
00644     shift(2);
00645     return NE;
00646   } else if (c1 == '+' && c2 == '+') {
00647     shift(2);
00648     if (terminator)
00649       return AUTOPLUSPLUS;
00650     else
00651       return PLUSPLUS;
00652   } else if (c1 == '-' && c2 == '-') {
00653     shift(2);
00654     if (terminator)
00655       return AUTOMINUSMINUS;
00656     else
00657       return MINUSMINUS;
00658   } else if (c1 == '=' && c2 == '=') {
00659     shift(2);
00660     return EQEQ;
00661   } else if (c1 == '+' && c2 == '=') {
00662     shift(2);
00663     return PLUSEQUAL;
00664   } else if (c1 == '-' && c2 == '=') {
00665     shift(2);
00666     return MINUSEQUAL;
00667   } else if (c1 == '*' && c2 == '=') {
00668     shift(2);
00669     return MULTEQUAL;
00670   } else if (c1 == '/' && c2 == '=') {
00671     shift(2);
00672     return DIVEQUAL;
00673   } else if (c1 == '&' && c2 == '=') {
00674     shift(2);
00675     return ANDEQUAL;
00676   } else if (c1 == '^' && c2 == '=') {
00677     shift(2);
00678     return XOREQUAL;
00679   } else if (c1 == '%' && c2 == '=') {
00680     shift(2);
00681     return MODEQUAL;
00682   } else if (c1 == '|' && c2 == '=') {
00683     shift(2);
00684     return OREQUAL;
00685   } else if (c1 == '<' && c2 == '<') {
00686     shift(2);
00687     return LSHIFT;
00688   } else if (c1 == '>' && c2 == '>') {
00689     shift(2);
00690     return RSHIFT;
00691   } else if (c1 == '&' && c2 == '&') {
00692     shift(2);
00693     return AND;
00694   } else if (c1 == '|' && c2 == '|') {
00695     shift(2);
00696     return OR;
00697   }
00698 
00699   switch(c1) {
00700     case '=':
00701     case '>':
00702     case '<':
00703     case ',':
00704     case '!':
00705     case '~':
00706     case '?':
00707     case ':':
00708     case '.':
00709     case '+':
00710     case '-':
00711     case '*':
00712     case '/':
00713     case '&':
00714     case '|':
00715     case '^':
00716     case '%':
00717     case '(':
00718     case ')':
00719     case '{':
00720     case '}':
00721     case '[':
00722     case ']':
00723     case ';':
00724       shift(1);
00725       return static_cast<int>(c1);
00726     default:
00727       return -1;
00728   }
00729 }
00730 
00731 unsigned short Lexer::singleEscape(unsigned short c) const
00732 {
00733   switch(c) {
00734   case 'b':
00735     return 0x08;
00736   case 't':
00737     return 0x09;
00738   case 'n':
00739     return 0x0A;
00740   case 'v':
00741     return 0x0B;
00742   case 'f':
00743     return 0x0C;
00744   case 'r':
00745     return 0x0D;
00746   case '"':
00747     return 0x22;
00748   case '\'':
00749     return 0x27;
00750   case '\\':
00751     return 0x5C;
00752   default:
00753     return c;
00754   }
00755 }
00756 
00757 unsigned short Lexer::convertOctal(unsigned short c1, unsigned short c2,
00758                                       unsigned short c3) const
00759 {
00760   return ((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
00761 }
00762 
00763 unsigned char Lexer::convertHex(unsigned short c)
00764 {
00765   if (c >= '0' && c <= '9')
00766     return (c - '0');
00767   else if (c >= 'a' && c <= 'f')
00768     return (c - 'a' + 10);
00769   else
00770     return (c - 'A' + 10);
00771 }
00772 
00773 unsigned char Lexer::convertHex(unsigned short c1, unsigned short c2)
00774 {
00775   return ((convertHex(c1) << 4) + convertHex(c2));
00776 }
00777 
00778 UChar Lexer::convertUnicode(unsigned short c1, unsigned short c2,
00779                                      unsigned short c3, unsigned short c4)
00780 {
00781   return UChar((convertHex(c1) << 4) + convertHex(c2),
00782                (convertHex(c3) << 4) + convertHex(c4));
00783 }
00784 
00785 void Lexer::record8(unsigned short c)
00786 {
00787   assert(c <= 0xff);
00788 
00789   // enlarge buffer if full
00790   if (pos8 >= size8 - 1) {
00791     char *tmp = new char[2 * size8];
00792     memcpy(tmp, buffer8, size8 * sizeof(char));
00793     delete [] buffer8;
00794     buffer8 = tmp;
00795     size8 *= 2;
00796   }
00797 
00798   buffer8[pos8++] = (char) c;
00799 }
00800 
00801 void Lexer::record16(UChar c)
00802 {
00803   // enlarge buffer if full
00804   if (pos16 >= size16 - 1) {
00805     UChar *tmp = new UChar[2 * size16];
00806     memcpy(tmp, buffer16, size16 * sizeof(UChar));
00807     delete [] buffer16;
00808     buffer16 = tmp;
00809     size16 *= 2;
00810   }
00811 
00812   buffer16[pos16++] = c;
00813 }
00814 
00815 bool Lexer::scanRegExp()
00816 {
00817   pos16 = 0;
00818   bool lastWasEscape = false;
00819   bool inBrackets = false;
00820 
00821   while (1) {
00822     if (current == '\r' || current == '\n' || current == 0)
00823       return false;
00824     else if (current != '/' || lastWasEscape == true || inBrackets == true)
00825     {
00826         // keep track of '[' and ']'
00827         if ( !lastWasEscape ) {
00828           if ( current == '[' && !inBrackets )
00829             inBrackets = true;
00830           if ( current == ']' && inBrackets )
00831             inBrackets = false;
00832         }
00833         record16(current);
00834         lastWasEscape =
00835             !lastWasEscape && (current == '\\');
00836     }
00837     else { // end of regexp
00838       pattern = UString(buffer16, pos16);
00839       pos16 = 0;
00840       shift(1);
00841       break;
00842     }
00843     shift(1);
00844   }
00845 
00846   while (isIdentLetter(current)) {
00847     record16(current);
00848     shift(1);
00849   }
00850   flags = UString(buffer16, pos16);
00851 
00852   return true;
00853 }
00854 
00855 
00856 void Lexer::doneParsing()
00857 {
00858   for (unsigned i = 0; i < numIdentifiers; i++) {
00859     delete identifiers[i];
00860   }
00861   free(identifiers);
00862   identifiers = 0;
00863   numIdentifiers = 0;
00864   identifiersCapacity = 0;
00865 
00866   for (unsigned i = 0; i < numStrings; i++) {
00867     delete strings[i];
00868   }
00869   free(strings);
00870   strings = 0;
00871   numStrings = 0;
00872   stringsCapacity = 0;
00873 }
00874 
00875 const int initialCapacity = 64;
00876 const int growthFactor = 2;
00877 
00878 Identifier *Lexer::makeIdentifier(UChar *buffer, unsigned int pos)
00879 {
00880   if (numIdentifiers == identifiersCapacity) {
00881     identifiersCapacity = (identifiersCapacity == 0) ? initialCapacity : identifiersCapacity *growthFactor;
00882     identifiers = (KJS::Identifier **)realloc(identifiers, sizeof(KJS::Identifier *) * identifiersCapacity);
00883   }
00884 
00885   KJS::Identifier *identifier = new KJS::Identifier(buffer, pos);
00886   identifiers[numIdentifiers++] = identifier;
00887   return identifier;
00888 }
00889 
00890 UString *Lexer::makeUString(UChar *buffer, unsigned int pos)
00891 {
00892   if (numStrings == stringsCapacity) {
00893     stringsCapacity = (stringsCapacity == 0) ? initialCapacity : stringsCapacity *growthFactor;
00894     strings = (UString **)realloc(strings, sizeof(UString *) * stringsCapacity);
00895   }
00896 
00897   UString *string = new UString(buffer, pos);
00898   strings[numStrings++] = string;
00899   return string;
00900 }
KDE Home | KDE Accessibility Home | Description of Access Keys