import std.string; import std.stdio; import std.c.stdlib; //debug = minor; //debug = reallyminor; //debug = lastoutput; class PhpElement { char[] name; int startline; int endline; char[] getTypeName() { return "Element"; } PhpElement[] elements; } class PhpFile: PhpElement { char[] getTypeName() { return "File"; } } class PhpClass : PhpElement { char[] getTypeName() { return "Class"; } } class PhpMethod : PhpElement { char[] getTypeName() { return "Method"; } } class PhpParser { char[] input; enum states { INITIAL =100, ST_IN_SCRIPTING, ST_DOUBLE_QUOTES, ST_SINGLE_QUOTE, ST_BACKQUOTE, ST_HEREDOC, ST_LOOKING_FOR_PROPERTY, ST_LOOKING_FOR_VARNAME, ST_COMMENT, ST_DOC_COMMENT, ST_ONE_LINE_COMMENT } const int T_REQUIRE_ONCE = 258; const int T_REQUIRE = 259; const int T_EVAL = 260; const int T_INCLUDE_ONCE = 261; const int T_INCLUDE = 262; const int T_LOGICAL_OR = 263; const int T_LOGICAL_XOR = 264; const int T_LOGICAL_AND = 265; const int T_PRINT = 266; const int T_SR_EQUAL = 267; const int T_SL_EQUAL = 268; const int T_XOR_EQUAL = 269; const int T_OR_EQUAL = 270; const int T_AND_EQUAL = 271; const int T_MOD_EQUAL = 272; const int T_CONCAT_EQUAL = 273; const int T_DIV_EQUAL = 274; const int T_MUL_EQUAL = 275; const int T_MINUS_EQUAL = 276; const int T_PLUS_EQUAL = 277; const int T_BOOLEAN_OR = 278; const int T_BOOLEAN_AND = 279; const int T_IS_NOT_IDENTICAL = 280; const int T_IS_IDENTICAL = 281; const int T_IS_NOT_EQUAL = 282; const int T_IS_EQUAL = 283; const int T_IS_GREATER_OR_EQUAL = 284; const int T_IS_SMALLER_OR_EQUAL = 285; const int T_SR = 286; const int T_SL = 287; const int T_INSTANCEOF = 288; const int T_UNSET_CAST = 289; const int T_BOOL_CAST = 290; const int T_OBJECT_CAST = 291; const int T_ARRAY_CAST = 292; const int T_STRING_CAST = 293; const int T_DOUBLE_CAST = 294; const int T_INT_CAST = 295; const int T_DEC = 296; const int T_INC = 297; const int T_CLONE = 298; const int T_NEW = 299; const int T_EXIT = 300; const int T_IF = 301; const int T_ELSEIF = 302; const int T_ELSE = 303; const int T_ENDIF = 304; const int T_LNUMBER = 305; const int T_DNUMBER = 306; const int T_STRING = 307; const int T_STRING_VARNAME = 308; const int T_VARIABLE = 309; const int T_NUM_STRING = 310; const int T_INLINE_HTML = 311; const int T_CHARACTER = 312; const int T_BAD_CHARACTER = 313; const int T_ENCAPSED_AND_WHITESPACE = 314; const int T_CONSTANT_ENCAPSED_STRING = 315; const int T_ECHO = 316; const int T_DO = 317; const int T_WHILE = 318; const int T_ENDWHILE = 319; const int T_FOR = 320; const int T_ENDFOR = 321; const int T_FOREACH = 322; const int T_ENDFOREACH = 323; const int T_DECLARE = 324; const int T_ENDDECLARE = 325; const int T_AS = 326; const int T_SWITCH = 327; const int T_ENDSWITCH = 328; const int T_CASE = 329; const int T_DEFAULT = 330; const int T_BREAK = 331; const int T_CONTINUE = 332; const int T_FUNCTION = 333; const int T_CONST = 334; const int T_RETURN = 335; const int T_TRY = 336; const int T_CATCH = 337; const int T_THROW = 338; const int T_USE = 339; const int T_GLOBAL = 340; const int T_PUBLIC = 341; const int T_PROTECTED = 342; const int T_PRIVATE = 343; const int T_FINAL = 344; const int T_ABSTRACT = 345; const int T_STATIC = 346; const int T_VAR = 347; const int T_UNSET = 348; const int T_ISSET = 349; const int T_EMPTY = 350; const int T_HALT_COMPILER = 351; const int T_CLASS = 352; const int T_INTERFACE = 353; const int T_EXTENDS = 354; const int T_IMPLEMENTS = 355; const int T_OBJECT_OPERATOR = 356; const int T_DOUBLE_ARROW = 357; const int T_LIST = 358; const int T_ARRAY = 359; const int T_CLASS_C = 360; const int T_METHOD_C = 361; const int T_FUNC_C = 362; const int T_LINE = 363; const int T_FILE = 364; const int T_COMMENT = 365; const int T_DOC_COMMENT = 366; const int T_OPEN_TAG = 367; const int T_OPEN_TAG_WITH_ECHO = 368; const int T_CLOSE_TAG = 369; const int T_WHITESPACE = 370; const int T_START_HEREDOC = 371; const int T_END_HEREDOC = 372; const int T_DOLLAR_OPEN_CURLY_BRACES = 373; const int T_CURLY_OPEN = 374; const int T_PAAMAYIM_NEKUDOTAYIM = 375; const int T_ONE_LINE_COMMENT = 400; states currentState; states[] stateStack; int yypos; int yystart; int line; class Token { int t; char[] str; int line; int pos; } Token[int] tokens; Token[] tokenOrdered; this(char[] str) { input = str; loadMatchTokens(); pushState(states.INITIAL); int c; yypos = 0; yystart = 0; int cline = 0; this.line = 0; debug(reallyminor) printf ("got input %s\n", toStringz(input)); } PhpElement parseAll() { yypos = 0; yystart = 0; for (int c = this.parseNext(); c > 0; c = this.parseNext()) { auto tok = new Token(); tok.t = c; // hack end.. if (yypos > input.length) { yypos = input.length; } tok.str = input[yystart..yypos]; tok.line = 0; tok.pos = yystart; debug(lastoutput) printf("got token(%d/%d) %d, VAL: %s\n ",yystart, input.length, cast(int)c, toStringz(tok.str)); tokens[yystart] = tok; tokenOrdered ~= tok; yystart = yypos; } //exit(1); int cline =1; for (int i=0; i < input.length;i++) { if (input[i] == '\n') { cline++; } if (i in tokens) { debug(lastoutput) writefln("setting pos ", i, " line to ", cline); tokens[i].line = cline; } } PhpElement ret = new PhpFile(); int i =0; int indent = 0; PhpElement[int] stack; PhpElement activeClass = null; //Element activeMethod = null; while (i < tokenOrdered.length) { Token tok = tokenOrdered[i]; //writefln("Token(", tok.pos, "): ", tok.t, ", VAL:", tok.str, " @line ", tok.line); switch (tokenOrdered[i].t) { case T_CLASS: // class[WS]name[WS]*{ i++; i++; writefln("GOT CLASS:" , tokenOrdered[i].str, " on line " , tokenOrdered[i].line); PhpElement e = new PhpClass; e.name = tokenOrdered[i].str; e.startline = tokenOrdered[i].line; ret.elements ~= e; stack[indent+1] = e; activeClass = e; i++; break; case T_FUNCTION: // function[WS]name[WS]*{ while ((i < tokenOrdered.length) && (tokenOrdered[i].t != cast(int)'(')) { if (tokenOrdered[i].t == T_STRING) { writefln("GOT FUNCTION:" , tokenOrdered[i].str , " on line " , tokenOrdered[i].line); PhpElement e = new PhpMethod; e.name = tokenOrdered[i].str; e.startline = tokenOrdered[i].line; if (activeClass) { activeClass.elements ~= e; } else { ret.elements ~= e; } stack[indent+1] = e; i++; break; } i++; } break; case cast(int) '{': indent++; i++; break; case cast(int) '}': if (indent in stack) { stack[indent].endline = tokenOrdered[i].line; //writefln("GOT END OF " , stack[indent].str, " on line " , tokenOrdered[i].line); stack.remove(indent); } indent--; i++; break; default: i++; } //writefln("Token(", tok.pos, "): ", tok.t, ", VAL:", tok.str, " @line ", tok.line); } return ret; } bool stateTest(states s, char[] str) { if (s != currentState) { return false; } debug(minor) printf (" STATETEST input %d/%d len=%d, %s\n", yypos,input.length, str.length, toStringz(str)); if (str.length + yypos > input.length) { debug(minor) writefln(" --to long"); return false; } if (input[yypos .. (str.length + yypos)] == str) { yypos += str.length; debug(minor) writefln(" ++MATCHED"); return true; } debug(minor) writefln(" --no match"); return false; } void pushState(states s) { debug(minor) writefln(" ++ADD STACK", cast(int)s); stateStack ~= s; this.currentState = s; } void dumpState() { debug(minor) writefln("----"); debug(minor) writefln("GOT STACK LENGTH:", cast(int)stateStack.length); debug(minor) writefln("CURRENT STACK ITEM:", cast(int)currentState); foreach(s; stateStack) { debug(minor) writefln("GOT STACK ITEM:", cast(int)s); } } void popState() { debug(minor) writefln(" --POP STACK"); if (stateStack.length < 1) { stateStack.length = states.INITIAL; this.currentState = states.INITIAL; return; } this.currentState = stateStack[stateStack.length-1]; stateStack.length = stateStack.length - 1; } bool isState(states s) { return this.currentState == s; } bool isEof() { debug(minor) writefln(" **eof test: ", yypos , ">= ", input.length); return yypos >= input.length; } bool stateTestRe(states s, char[] rege) { debug(minor) writefln(" stateTestRe State=", cast(int) s, " for regex: " ,rege); if (currentState != s) { return false; } if (isEof()) { return false; } debug(minor) writefln(" stateTestRe State=", cast(int) s, " for regex: " ,rege, " START='", input[yypos], "'"); int y; switch (rege) { case "LABEL": //LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* char c0 = input[yypos]; if ( ((c0 >= 'a') && (c0 <='z')) || ((c0 >= 'A') && (c0 <='Z')) || (c0 == '_') || ((c0 >= 0x7f ) && (c0 <= 0xff)) ) { //c0_match = true; } else { return false; } y = yypos + 1; // next chars.. while (y < input.length) { char c0 = input[y]; if ( ((c0 >= 'a') && (c0 <='z')) || ((c0 >= 'A') && (c0 <='Z')) || ((c0 >= '0') && (c0 <='9')) || (c0 == '_') || ((c0 >= 0x7f ) && (c0 <= 0xff)) ) { y++; continue; } y--; break; } yypos = y+1; return true; case "$LABEL": char c0 = input[yypos]; if (c0 != '$') { return false; } int yystart = yypos; yypos++; if (!stateTestRe(s, "LABEL")) { yypos = yystart; return false; } return true; case "LNUM": //[0-9]+ int y = yypos; while (y < input.length) { char c0 = input[y]; if ((c0 >= '0') && (c0 <='9')) { y++; continue; } // got a char that doesnt match.. if (y == yypos) { return false; } yypos = y; return true; break; } // end of file.. return false; case "DNUM": //DNUM ([0-9]*[\.][0-9]+) //|([0-9]+[\.][0-9]*) // eg. 0000.00000, .00000 , 000. y = yypos; int got_dot = false; while (y < input.length) { char c0 = input[y]; if ((c0 >= '0') && (c0 <='9')) { y++; continue; } if (!got_dot && c0=='.') { got_dot = true; y++; continue; } y--; break; } if (y <= yypos) { return false; } if (((y - yypos) == 1) && (input[yypos] == '.')) { return false; } yypos = y+1; return true; case "EXPONENT_DNUM": //(({LNUM}|{DNUM})[eE][+-]?{LNUM}) int ystart = yypos; if (!this.stateTestRe(s, "LNUM")) { yypos = ystart; if (!this.stateTestRe(s, "DNUM")) { yypos = ystart; return false; } } y = yypos; char c0 = input[y]; if ((c0 != 'e') && (c0 != 'E')) { yypos = ystart; return false; } y++; char c1 = input[y]; if ((c1 == '+') || (c1 == '-')) { y++; } yypos = y; if (!this.stateTestRe(s, "LNUM")) { yypos = ystart; return false; } // yypos should be ok! return true; case "HNUM": // "0x"[0-9a-fA-F]+ int y = yypos; while (y < input.length) { char c0 = input[y]; if ((y == yypos) && (c0 != '0')) { return false; } if (y == yypos) { y++; continue; } if ((y == (yypos + 1)) && (c0 != 'x')) { return false; } if (y == (yypos +1)) { y++; continue; } if ( ((c0 >= 'a') && (c0 <='f')) || ((c0 >= 'A') && (c0 <='F')) || ((c0 >= '0') && (c0 <='9')) ) { y++; continue; } y--; break; } if (y <= (yypos + 2)) { return false; } yypos = y+1; return true; case "WHITESPACE": // [ \n\r\t]+ int y = yypos; while (y < input.length) { char c0 = input[y]; debug(minor) writefln(" WHITESPACE TEST @",y,"/",input.length, " CHAR=", c0); if ((c0 == ' ') || (c0 == '\n') || (c0 == '\r') || (c0 == '\t') ) { if (c0 == '\n') { this.line++; } y++; continue; } y--; break; } if (y < yypos) { return false; } yypos = y+1; debug(minor) writefln(" WHITESPACE RETURNING yypos=",yypos); return true; case "TABS_AND_SPACES": // [ \t]* - really just shifts the yypos .. int y = yypos; while (y < input.length) { char c0 = input[y]; if ((c0 == ' ') || (c0 == '\t') ) { y++; continue; } y--; break; } yypos = y+1; return true; case "ENCAPSED_TOKENS": //[\[\]{}$] if (isEof()) { return false; } int y = yypos; char c0 = input[y]; if ((c0 == '[') || (c0 == ']') || (c0 == '{') || (c0 == '}') || (c0 == '$') ) { yypos++; return true; } return false; case "ESCAPED_AND_WHITESPACE": //[\n\t\r #'.:;,()|^&+-/*=%!~<>?@]+ int y = yypos; while (y < input.length) { char c0 = input[y]; if ((c0 == '\n') || (c0 == '\t') || (c0 == '\r') || (c0 == ' ') || (c0 == '#') || (c0 == '\'') || (c0 == '.') || (c0 == ':') || (c0 == ';') || (c0 == ',') || // (c0 == '[') || // (c0 == ']') || (c0 == '(') || (c0 == ')') || (c0 == '|') || (c0 == '^') || (c0 == '&') || (c0 == '+') || (c0 == '-') || (c0 == '/') || (c0 == '*') || (c0 == '=') || (c0 == '%') || (c0 == '!') || (c0 == '~') || //(c0 == '$') || (c0 == '<') || (c0 == '>') || (c0 == '?') || (c0 == '@')) { if (c0 == '\n') { this.line++; } y++; continue; } y--; break; } if (y <= yypos) { return false; } yypos = y+1; return true; case "ANY_CHAR": y = yypos; if (y < input.length) { yypos ++; if (input[y] == '\n') { this.line++; } return true; } return false; case "NEWLINE": // ("\r"|"\n"|"\r\n") y = yypos; if (y >= input.length) { return false; } char c0 = input[y]; if ((y+1) < input.length) { char c1 = input[y+1]; if ((c0 == '\r') && (c1 == '\n')) { yypos +=2; this.line++; return true; } } if (((c0 == '\n') || (c0 == '\r'))) { if (c0 == '\n') { this.line++; } yypos++; return true; } return false; case "TABS_AND_SPACES(int|integer)TABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); if (!stateTest(s,"integer") && !stateTest(s,"int")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "TABS_AND_SPACESstringTABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); int yyw = yypos; if (!stateTest(s,"string")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "TABS_AND_SPACESarrayTABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); if (!stateTest(s,"array")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "TABS_AND_SPACESobjectTABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); if (!stateTest(s,"object")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "TABS_AND_SPACESbool|booleanTABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); if (!stateTest(s,"boolean") && !stateTest(s,"bool")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "TABS_AND_SPACESunsetTABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); if (!stateTest(s,"unset")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "STANDARD_START": //"" if (stateTest(s, "")) { yypos = yystart; return false; } return true; case "NOT_START": //(([^<] |"<"[^?%s<]){1,400})| " yypos) { yypos = new_yypos; return true; } else { return false; } } } //return false; // off the end.. } y++; } // not sure about this.. probabably needs testing.. if (y == yypos) { return false; } yypos = y; return true; case "QUOTED_STRING": // ([\"] ([^$\"\\]|(\"\\\".))* [\"])")) y = yypos; int yystart = y; if (!stateTest(s, "\"")) { return false; } y++; while (y < input.length) { char c0 = input[y]; if ((c0 == '"')) { // found end.. break; } if ((c0 != '\\')) { y++; continue; } // got back slash.. - eat the next char... // this is a bit different to the standard... y++; if (y < input.length) { y++; continue; } // technically reading string off the end!!! return false; } yypos = y; if (!stateTest(s, "\"")) { yypos = yystart; return false; } return true; case "SQUOTED_STRING": // '...' y = yypos; int yystart = y; if (!stateTest(s, "'")) { return false; } y++; while (y < input.length) { char c0 = input[y]; if ((c0 == '\'')) { // found end.. break; } if ((c0 != '\\')) { y++; continue; } // got back slash.. - eat the next char... // this is a bit different to the standard... y++; if (y < input.length) { y++; continue; } // technically reading string off the end!!! return false; } yypos = y; if (!stateTest(s, "'")) { yypos = yystart; return false; } return true; case "BQUOTED_STRING": // (`.....` y = yypos; int yystart = y; if (!stateTest(s, "`")) { return false; } y++; while (y < input.length) { char c0 = input[y]; if ((c0 == '`')) { // found end.. break; } if ((c0 != '\\')) { y++; continue; } // got back slash.. - eat the next char... // this is a bit different to the standard... y++; if (y < input.length) { y++; continue; } // technically reading string off the end!!! return false; } yypos = y; if (!stateTest(s, "`")) { yypos = yystart; return false; } return true; case "HEREDOC_BLOCK": // // <<<{TABS_AND_SPACES}{LABEL}{NEWLINE} ^{LABEL}(";")?{NEWLINE} int yystart = yypos; if (!stateTest(states.ST_IN_SCRIPTING,"<<<")) { return false; } if (!stateTestRe(states.ST_IN_SCRIPTING,"TABS_AND_SPACES")) { yypos = yystart; return false; } int lstart = yypos; if (!stateTestRe(states.ST_IN_SCRIPTING,"LABEL")) { yypos = yystart; return false; } char[] hlabel = input[lstart..yypos]; if (!stateTestRe(states.ST_IN_SCRIPTING,"NEWLINE")) { yypos = yystart; return false; } // now in the block.. while (yypos < input.length) { int nlstart = yypos; if (!stateTestRe(states.ST_IN_SCRIPTING,"NEWLINE")) { yypos++; continue; } if (!stateTest(states.ST_IN_SCRIPTING, hlabel)) { yypos = nlstart + 1; continue; } stateTest(states.ST_IN_SCRIPTING, ";"); // eat semicolon.. if (!stateTestRe(states.ST_IN_SCRIPTING,"NEWLINE")) { yypos = nlstart + 1; continue; } // got a full match.... return true; } yypos = yystart; return false; case "COMMENT": // /*.....*/ if (!stateTest(states.ST_IN_SCRIPTING,"/*")) { return false; } while (yypos < input.length) { if (!stateTest(states.ST_IN_SCRIPTING,"*/")) { yypos++; continue; } // got end.. return true; } // got to end! = really a syntax error... return true; case "ONE_LINE_COMMENT": if (!stateTest(states.ST_IN_SCRIPTING,"//") && !stateTest(states.ST_IN_SCRIPTING,"#")) { return false; } while (yypos < input.length) { if (stateTest(states.ST_IN_SCRIPTING,"?>")) { yypos--; yypos--; return true; } if (stateTest(states.ST_IN_SCRIPTING,"%>")) { yypos--; yypos--; return true; } if (stateTestRe(states.ST_IN_SCRIPTING,"NEWLINE")) { return true; } yypos++; } // end of file.. return true; default: writefln("unknown Regex", rege); exit(0); } } int parseNext() { if (isEof() && ( isState(states.ST_DOUBLE_QUOTES) || isState(states.ST_BACKQUOTE) || isState(states.INITIAL) || isState(states.ST_IN_SCRIPTING) || isState(states.ST_LOOKING_FOR_PROPERTY))) { return 0; } if (isEof() && ( isState(states.ST_COMMENT) || isState(states.ST_DOC_COMMENT)) ) { writefln("Unterminated comment starting line"); //%d", CG(comment_start_line)); exit(0); //zend_error(E_COMPILE_WARNING,"Unterminated comment starting line %d", CG(comment_start_line)); return 0; } switch (this.currentState) { case states.INITIAL: return this.parseNextInitial(); case states.ST_IN_SCRIPTING: return this.parseNextInScripting(); default: writefln("Case Not Handled Yet:", this.currentState); exit(1); } return 0; } int parseNextInitial() { if (stateTestRe(states.INITIAL,"NOT_START")) { //(([^<]|"<"[^?%s<]){1,400})|"value.str.val), &(zendlval->value.str.len), yytext, yyleng TSRMLS_CC); if (readsize < yyleng) { yyless(readsize); } } else { zendlval->value.str.val = (char *) estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; } #else /* !ZEND_MULTIBYTE zendlval->value.str.val = (char *) estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; #endif /* ZEND_MULTIBYTE zendlval->type = IS_STRING; HANDLE_NEWLINES(yytext, yyleng); */ return T_INLINE_HTML; } if (stateTestRe(states.INITIAL, "STANDARD_START")) { //"" pushState(states.ST_IN_SCRIPTING); return T_OPEN_TAG; /* HANDLE_NEWLINES(yytext, yyleng); if (CG(short_tags) || yyleng>2) { yyleng>2 means it's not zendlval->value.str.val = yytext; /* no copying - intentional zendlval->value.str.len = yyleng; zendlval->type = IS_STRING; BEGIN(ST_IN_SCRIPTING); return T_OPEN_TAG; } else { zendlval->value.str.val = (char *) estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; zendlval->type = IS_STRING; return T_INLINE_HTML; } */ } /* if (stateTestRe(INITIAL,"value.str.val = yytext; /* no copying - intentional //zendlval->value.str.len = yyleng; //zendlval->type = IS_STRING; //HANDLE_NEWLINE(yytext[yyleng-1]); pushState(ST_IN_SCRIPTING); return T_OPEN_TAG; } */ if (stateTest(states.INITIAL,"<%=") || stateTest(states.INITIAL,"value.str.val = yytext; /* no copying - intentional zendlval->value.str.len = yyleng; zendlval->type = IS_STRING; BEGIN(ST_IN_SCRIPTING); return T_OPEN_TAG_WITH_ECHO; } else { zendlval->value.str.val = (char *) estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; zendlval->type = IS_STRING; return T_INLINE_HTML; } */ } if (stateTest(states.INITIAL,"<%")) { pushState(states.ST_IN_SCRIPTING); return T_OPEN_TAG; //if (CG(asp_tags)) { // zendlval->value.str.val = yytext; /* no copying - intentional */ // zendlval->value.str.len = yyleng; // zendlval->type = IS_STRING; // BEGIN(ST_IN_SCRIPTING); // return T_OPEN_TAG; //} else { // zendlval->value.str.val = (char *) estrndup(yytext, yyleng); // zendlval->value.str.len = yyleng; // zendlval->type = IS_STRING; // return T_INLINE_HTML; //} } return 0; } int[char[]] matchTokens; void loadMatchTokens() { matchTokens["=>"]= T_DOUBLE_ARROW; matchTokens["++"]= T_INC; matchTokens["--"]= T_DEC; matchTokens["==="]= T_IS_IDENTICAL; matchTokens["!=="]= T_IS_NOT_IDENTICAL; matchTokens["=="]= T_IS_EQUAL; matchTokens["!="]= T_IS_NOT_EQUAL; matchTokens["<>"]= T_IS_NOT_EQUAL; matchTokens["<="]= T_IS_SMALLER_OR_EQUAL; matchTokens[">="]= T_IS_GREATER_OR_EQUAL; matchTokens["+="]= T_PLUS_EQUAL; matchTokens["=="]= T_MINUS_EQUAL; matchTokens["*="]= T_MUL_EQUAL; matchTokens["/="]= T_DIV_EQUAL; matchTokens[".="]= T_CONCAT_EQUAL; matchTokens["%="]= T_MOD_EQUAL; matchTokens["<<="]= T_SL_EQUAL; matchTokens[">>="]= T_SR_EQUAL; matchTokens["&="]= T_AND_EQUAL; matchTokens["|="]= T_OR_EQUAL; matchTokens["^="]= T_XOR_EQUAL; matchTokens["||"]= T_BOOLEAN_OR; matchTokens["&&"]= T_BOOLEAN_AND; matchTokens["<<"]= T_SL; matchTokens[">>"]= T_SR; matchTokens[";"] = cast(int) ';'; matchTokens[":"] = cast(int) ':'; matchTokens[","] = cast(int) ','; matchTokens["."] = cast(int) '.'; matchTokens["["] = cast(int) '['; matchTokens["]"] = cast(int) ']'; matchTokens["("] = cast(int) '('; matchTokens[")"] = cast(int) ')'; matchTokens["|"] = cast(int) '|'; matchTokens["^"] = cast(int) '^'; matchTokens["&"] = cast(int) '&'; matchTokens["+"] = cast(int) '+'; matchTokens["-"] = cast(int) '-'; matchTokens["/"] = cast(int) '/'; matchTokens["*"] = cast(int) '*'; matchTokens["="] = cast(int) '='; matchTokens["%"] = cast(int) '%'; matchTokens["!"] = cast(int) '!'; matchTokens["~"] = cast(int) '~'; matchTokens["$"] = cast(int) '$'; matchTokens["<"] = cast(int) '<'; matchTokens[">"] = cast(int) '>'; matchTokens["?"] = cast(int) '?'; matchTokens["@"] = cast(int) '@'; matchTokens["{"] = cast(int) '{'; matchTokens["}"] = cast(int) '}'; matchTokens["::"] = T_PAAMAYIM_NEKUDOTAYIM; matchTokens["->"] = T_OBJECT_OPERATOR; matchTokens["?>"] =T_CLOSE_TAG; matchTokens["%>"] =T_CLOSE_TAG; matchTokens["exit"]=T_EXIT; matchTokens["die"]=T_EXIT; matchTokens["function"]=T_FUNCTION; matchTokens["const"]=T_CONST; matchTokens["return"]=T_RETURN; matchTokens["try"]=T_TRY; matchTokens["catch"]=T_CATCH; matchTokens["throw"]=T_THROW; matchTokens["if" ]=T_IF; matchTokens["elseif"]=T_ELSEIF; matchTokens["endif"]=T_ENDIF; matchTokens["else"]=T_ELSE; matchTokens["while"]=T_WHILE; matchTokens["endwhile"]=T_ENDWHILE; matchTokens["do"]=T_DO; matchTokens["for"]=T_FOR; matchTokens["endfor"]=T_ENDFOR; matchTokens["foreach"]=T_FOREACH; matchTokens["endforeach"]=T_ENDFOREACH; matchTokens["declare"]=T_DECLARE; matchTokens["enddeclare"]=T_ENDDECLARE; matchTokens["instanceof"]=T_INSTANCEOF; matchTokens["as"]=T_AS; matchTokens["switch"]=T_SWITCH; matchTokens["endswitch"]=T_ENDSWITCH; matchTokens["case"]=T_CASE; matchTokens["default"]=T_DEFAULT; matchTokens["break"]=T_BREAK; matchTokens["continue"]=T_CONTINUE; matchTokens["echo"]=T_ECHO; matchTokens["print"]=T_PRINT; matchTokens["class"]=T_CLASS; matchTokens["interface"]=T_INTERFACE; matchTokens["extends"]=T_EXTENDS; matchTokens["implements"]=T_IMPLEMENTS; matchTokens["new"]=T_NEW; matchTokens["clone"]=T_CLONE; matchTokens["var"]=T_VAR; matchTokens["eval"]=T_EVAL; matchTokens["include"]=T_INCLUDE; matchTokens["include_once"]=T_INCLUDE_ONCE; matchTokens["require"]=T_REQUIRE; matchTokens["require_once"]=T_REQUIRE_ONCE; matchTokens["use"]=T_USE; matchTokens["global"]=T_GLOBAL; matchTokens["isset"]=T_ISSET; matchTokens["empty"]=T_EMPTY; matchTokens["__halt_compiler"]=T_HALT_COMPILER; matchTokens["static"]=T_STATIC; matchTokens["abstract"]=T_ABSTRACT; matchTokens["final"]=T_FINAL; matchTokens["private"]=T_PRIVATE; matchTokens["protected"]=T_PROTECTED; matchTokens["public"]=T_PUBLIC; matchTokens["unset"]=T_UNSET; matchTokens["list"]=T_LIST; matchTokens["array"]=T_ARRAY; matchTokens["OR" ]=T_LOGICAL_OR; matchTokens["AND"]=T_LOGICAL_AND; matchTokens["XOR"]=T_LOGICAL_XOR; matchTokens[ "__CLASS__"]=T_CLASS_C; matchTokens["__FUNCTION__"]= T_FUNC_C; matchTokens["__METHOD__" ]=T_METHOD_C; matchTokens["__LINE__"]=T_LINE; matchTokens["__FILE__"]=T_FILE; } int parseNextInScripting() { int ret = 0; int ret_len = 0; int ret_pos = 0; int yystart = yypos; debug(reallyminor) printf("Remaining String: %s\n", toStringz(input[yypos..input.length])); if (stateTestRe(states.ST_IN_SCRIPTING,"LABEL")) { ret = T_STRING; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; if (stateTestRe(states.ST_IN_SCRIPTING,"TABS_AND_SPACES(int|integer)TABS_AND_SPACES")) { if (yypos - yystart > ret_len) { ret = T_INT_CAST; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"TABS_AND_SPACESstringTABS_AND_SPACES")) { if (yypos - yystart > ret_len) { ret = T_STRING_CAST; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"TABS_AND_SPACESarrayTABS_AND_SPACES")) { if (yypos - yystart > ret_len) { ret = T_ARRAY_CAST; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"TABS_AND_SPACESobjectTABS_AND_SPACES")) { if (yypos - yystart > ret_len) { ret = T_OBJECT_CAST; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"TABS_AND_SPACESbool|booleanTABS_AND_SPACES")) { if (yypos - yystart > ret_len) { ret = T_BOOL_CAST; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"TABS_AND_SPACESunsetTABS_AND_SPACES")) { if (yypos - yystart > ret_len) { ret = T_UNSET_CAST; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } foreach(mtoken, nret; matchTokens) { if (stateTest(states.ST_IN_SCRIPTING,mtoken)) { if (yypos - yystart >= ret_len) { ret = nret; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } } if (stateTestRe(states.ST_IN_SCRIPTING,"LNUM")) { if (yypos - yystart > ret_len) { ret = T_DNUMBER; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"HNUM")) { if (yypos - yystart > ret_len) { ret = T_LNUMBER; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"DNUM")) { if (yypos - yystart > ret_len) { ret = T_DNUMBER; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"EXPONENT_DNUM")) { if (yypos - yystart > ret_len) { ret = T_DNUMBER; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"$LABEL")) { if (yypos - yystart > ret_len) { ret = T_VARIABLE; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"WHITESPACE")) { if (yypos - yystart > ret_len) { ret = T_WHITESPACE; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"COMMENT")) { if (yypos - yystart > ret_len) { ret = T_COMMENT; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"ONE_LINE_COMMENT")) { if (yypos - yystart > ret_len) { ret = T_ONE_LINE_COMMENT; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTest(states.ST_IN_SCRIPTING,""){NEWLINE}? stateTestRe(states.ST_IN_SCRIPTING,"WHITESPACE"); // eat white space.. if (stateTest(states.ST_IN_SCRIPTING,">")) { if (yypos - yystart > ret_len) { ret = T_CLOSE_TAG; ret_len = yypos - yystart; ret_pos = yypos; } } } yypos = yystart; if (stateTestRe(states.ST_IN_SCRIPTING,"QUOTED_STRING")) { if (yypos - yystart > ret_len) { ret = T_CONSTANT_ENCAPSED_STRING; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"SQUOTED_STRING")) { if (yypos - yystart > ret_len) { ret = T_CONSTANT_ENCAPSED_STRING; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"BQUOTED_STRING")) { if (yypos - yystart > ret_len) { ret = cast(int) '`'; ret_len = yypos - yystart; ret_pos = yypos; } yypos = yystart; } if (stateTestRe(states.ST_IN_SCRIPTING,"HEREDOC_BLOCK")) { // should really preserve the HEREDOC...label.. if (yypos - yystart > ret_len) { ret = T_START_HEREDOC; ret_len = yypos - yystart; ret_pos = yypos; } } yypos = yystart; if (!ret) { return 0; } yypos = ret_pos; switch (ret) { case T_OBJECT_OPERATOR: // eat the label.. stateTestRe(states.ST_IN_SCRIPTING, "LABEL"); break; case T_CLOSE_TAG: pushState(states.INITIAL); break; default: break; } return ret; } } /* void main1() { auto x = new test(""); x.stateTest(test.states.INITIAL, "vvasdfdsssssssssssssssssssssssssssss"); x.stateTest(test.states.INITIAL, ""); debug(minor) writefln("isEOF=", x.isEof()); { auto y = new test("hello_world();"); writefln("TEST LABEL", cast(int) y.stateTestRe(test.states.INITIAL, "LABEL")); writefln("now @", y.input[y.yypos]); } { auto y = new test("$hello_world();"); writefln("TEST $LABEL", cast(int) y.stateTestRe(test.states.INITIAL, "$LABEL")); writefln("now @", y.input[y.yypos]); } { auto y = new test("090909;"); writefln("TEST LNUM", cast(int) y.stateTestRe(test.states.INITIAL, "LNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test("12123123.;"); writefln("TEST DNUM", cast(int) y.stateTestRe(test.states.INITIAL, "DNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test(".12123123.;"); writefln("TEST DNUM", cast(int) y.stateTestRe(test.states.INITIAL, "DNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test("34343.12123123;"); writefln("TEST DNUM", cast(int) y.stateTestRe(test.states.INITIAL, "DNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test("34343.12123123E343;"); writefln("EXPONENT_DNUM", cast(int) y.stateTestRe(test.states.INITIAL, "EXPONENT_DNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test("34343E343;"); writefln("EXPONENT_DNUM", cast(int) y.stateTestRe(test.states.INITIAL, "EXPONENT_DNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test("34343E-343;"); writefln("EXPONENT_DNUM", cast(int) y.stateTestRe(test.states.INITIAL, "EXPONENT_DNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test("34343E;"); writefln("EXPONENT_DNUM", cast(int) y.stateTestRe(test.states.INITIAL, "EXPONENT_DNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test("34343E+343;"); writefln("EXPONENT_DNUM", cast(int) y.stateTestRe(test.states.INITIAL, "EXPONENT_DNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test("0x3fc;"); writefln("HNUM", cast(int) y.stateTestRe(test.states.INITIAL, "HNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test("0x3fcG;"); writefln("HNUM", cast(int) y.stateTestRe(test.states.INITIAL, "HNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test("0x3fcv;"); writefln("HNUM", cast(int) y.stateTestRe(test.states.INITIAL, "HNUM")); writefln("now @", y.input[y.yypos]); } { auto y = new test("\n\n\nxxxx;"); writefln("WHITESPACE", cast(int) y.stateTestRe(test.states.INITIAL, "WHITESPACE")); writefln("now @", y.input[y.yypos]); } { auto y = new test(" \t \txxxx;"); writefln("TABS_AND_SPACES", cast(int) y.stateTestRe(test.states.INITIAL, "TABS_AND_SPACES")); writefln("now @", y.input[y.yypos]); } { auto y = new test("[]{}$"); for (bool ret = true; ret; ret = y.stateTestRe(test.states.INITIAL, "ENCAPSED_TOKENS") ) { writefln("ENCAPSED_TOKENS now @POS:", y.yypos, "RET=", cast(int)ret); } } { auto y = new test("\n\t\r #'.:;,ccc()|^&+-/*=%!~<>?@"); bool ret = y.stateTestRe(test.states.INITIAL, "ESCAPED_AND_WHITESPACE"); writefln("ESCAPED_AND_WHITESPACE now @POS:", y.yypos, "RET=", cast(int)ret); ret = y.stateTestRe(test.states.INITIAL, "LABEL"); writefln("ESCAPED_AND_WHITESPACE now @POS:", y.yypos, "RET=", cast(int)ret); ret = y.stateTestRe(test.states.INITIAL, "ESCAPED_AND_WHITESPACE"); writefln("ESCAPED_AND_WHITESPACE now @POS:", y.yypos, "RET=", cast(int)ret); } { auto y = new test("\n\t6343"); for (bool ret = true; ret; ret = y.stateTestRe(test.states.INITIAL, "ANY_CHAR") ) { writefln("ANY_CHAR now @POS:", y.yypos, "RET=", cast(int)ret); } } { auto y = new test("\r\n\n\r"); for (bool ret = true; ret; ret = y.stateTestRe(test.states.INITIAL, "NEWLINE") ) { writefln("NEWLINE now @POS:", y.yypos, "RET=", cast(int)ret); } } //"TABS_AND_SPACES(int|integer)TABS_AND_SPACES": { auto y = new test("\t int xx"); for (bool ret = true; ret; ret = y.stateTestRe(test.states.INITIAL, "TABS_AND_SPACES(int|integer)TABS_AND_SPACES") ) { writefln("TABS_AND_SPACES(int|integer)TABS_AND_SPACES now @POS:", y.yypos, "RET=", cast(int)ret); } } { auto y = new test("\t integer xx"); for (bool ret = true; ret; ret = y.stateTestRe(test.states.INITIAL, "TABS_AND_SPACES(int|integer)TABS_AND_SPACES") ) { writefln("TABS_AND_SPACES(int|integer)TABS_AND_SPACES now @POS:", y.yypos, "RET=", cast(int)ret); } } { auto y = new test(""); for (bool ret = true; ret; ret = y.stateTestRe(test.states.INITIAL, "STANDARD_START") ) { writefln("STANDARD_START", y.yypos, "RET=", cast(int)ret); } } { auto y = new test("asdfasdfasdfvvvvv