class PhpTokenizer { states currentState; states[int] stateStack; String input int yypos; int yystart; struct token { char t; String str; int line; } token[int] tokens; this(String inp) { this.input = inp; pushState(INITIAL); char c = token; yypos = 0; yystart = 0; cline = 0; while (0 ~= (c = this.parseNext())) { tok = new token; tok.t = c; tok.str = input[yystart..(yypos - yystart); tok.line = cline; writefln("got token: ", cast(int)c, ", VAL:", tok.str, " @ line:" ,tok.line); token ~= cast(char) tok; yystart = yypos; cline = this.line; } } enum states { INITIAL, ST_IN_SCRIPTING, ST_DOUBLE_QUOTES, ST_SINGLE_QUOTE, ST_BACKQUOTE, ST_HEREDOC, ST_LOOKING_FOR_PROPERTY, ST_LOOKING_FOR_VARNAME, ST_COMMENT, ST_DOC_COMMENT, ST_ONE_LINE_COMMENT } const int T_REQUIRE_ONCE = 258, const int T_REQUIRE = 259, const int T_EVAL = 260, const int T_INCLUDE_ONCE = 261, const int T_INCLUDE = 262, const int T_LOGICAL_OR = 263, const int T_LOGICAL_XOR = 264, const int T_LOGICAL_AND = 265, const int T_PRINT = 266, const int T_SR_EQUAL = 267, const int T_SL_EQUAL = 268, const int T_XOR_EQUAL = 269, const int T_OR_EQUAL = 270, const int T_AND_EQUAL = 271, const int T_MOD_EQUAL = 272, const int T_CONCAT_EQUAL = 273, const int T_DIV_EQUAL = 274, const int T_MUL_EQUAL = 275, const int T_MINUS_EQUAL = 276, const int T_PLUS_EQUAL = 277, const int T_BOOLEAN_OR = 278, const int T_BOOLEAN_AND = 279, const int T_IS_NOT_IDENTICAL = 280, const int T_IS_IDENTICAL = 281, const int T_IS_NOT_EQUAL = 282, const int T_IS_EQUAL = 283, const int T_IS_GREATER_OR_EQUAL = 284, const int T_IS_SMALLER_OR_EQUAL = 285, const int T_SR = 286, const int T_SL = 287, const int T_INSTANCEOF = 288, const int T_UNSET_CAST = 289, const int T_BOOL_CAST = 290, const int T_OBJECT_CAST = 291, const int T_ARRAY_CAST = 292, const int T_STRING_CAST = 293, const int T_DOUBLE_CAST = 294, const int T_INT_CAST = 295, const int T_DEC = 296, const int T_INC = 297, const int T_CLONE = 298, const int T_NEW = 299, const int T_EXIT = 300, const int T_IF = 301, const int T_ELSEIF = 302, const int T_ELSE = 303, const int T_ENDIF = 304, const int T_LNUMBER = 305, const int T_DNUMBER = 306, const int T_STRING = 307, const int T_STRING_VARNAME = 308, const int T_VARIABLE = 309, const int T_NUM_STRING = 310, const int T_INLINE_HTML = 311, const int T_CHARACTER = 312, const int T_BAD_CHARACTER = 313, const int T_ENCAPSED_AND_WHITESPACE = 314, const int T_CONSTANT_ENCAPSED_STRING = 315, const int T_ECHO = 316, const int T_DO = 317, const int T_WHILE = 318, const int T_ENDWHILE = 319, const int T_FOR = 320, const int T_ENDFOR = 321, const int T_FOREACH = 322, const int T_ENDFOREACH = 323, const int T_DECLARE = 324, const int T_ENDDECLARE = 325, const int T_AS = 326, const int T_SWITCH = 327, const int T_ENDSWITCH = 328, const int T_CASE = 329, const int T_DEFAULT = 330, const int T_BREAK = 331, const int T_CONTINUE = 332, const int T_FUNCTION = 333, const int T_CONST = 334, const int T_RETURN = 335, const int T_TRY = 336, const int T_CATCH = 337, const int T_THROW = 338, const int T_USE = 339, const int T_GLOBAL = 340, const int T_PUBLIC = 341, const int T_PROTECTED = 342, const int T_PRIVATE = 343, const int T_FINAL = 344, const int T_ABSTRACT = 345, const int T_STATIC = 346, const int T_VAR = 347, const int T_UNSET = 348, const int T_ISSET = 349, const int T_EMPTY = 350, const int T_HALT_COMPILER = 351, const int T_CLASS = 352, const int T_INTERFACE = 353, const int T_EXTENDS = 354, const int T_IMPLEMENTS = 355, const int T_OBJECT_OPERATOR = 356, const int T_DOUBLE_ARROW = 357, const int T_LIST = 358, const int T_ARRAY = 359, const int T_CLASS_C = 360, const int T_METHOD_C = 361, const int T_FUNC_C = 362, const int T_LINE = 363, const int T_FILE = 364, const int T_COMMENT = 365, const int T_DOC_COMMENT = 366, const int T_OPEN_TAG = 367, const int T_OPEN_TAG_WITH_ECHO = 368, const int T_CLOSE_TAG = 369, const int T_WHITESPACE = 370, const int T_START_HEREDOC = 371, const int T_END_HEREDOC = 372, const int T_DOLLAR_OPEN_CURLY_BRACES = 373, const int T_CURLY_OPEN = 374, const int T_PAAMAYIM_NEKUDOTAYIM = 375 }; boolean stateTest(states s, String str) { if (s != currentState) { return false; } if (input[yypos.. str.length] == str) { yypos += str.length; return true; } return false; } void pushState(state s) { stateStack ~= s; this.currentState = s; } void popState() { state s = stateStack[stateStack.length] this.currentState = s.remove(stateStack.length); } isState(state s) { return this.currentState == s; } boolean isEof() { return yypos > input.length; } int parseNext() { /* we need to sort this out so the longest match is returned.. */ if (stateTest(ST_IN_SCRIPTING, "exit")) { return T_EXIT; } if (stateTest(ST_IN_SCRIPTING,"die")) { return T_EXIT; } if (stateTest(ST_IN_SCRIPTING,"function")) { return T_FUNCTION; } if (stateTest(ST_IN_SCRIPTING,"const")) { return T_CONST; } if (stateTest(ST_IN_SCRIPTING,"return")) { return T_RETURN; } if (stateTest(ST_IN_SCRIPTING,"try))" { return T_TRY; } if (stateTest(ST_IN_SCRIPTING,"catch")) { return T_CATCH; } if (stateTest(ST_IN_SCRIPTING,"throw")) { return T_THROW; } if (stateTest(ST_IN_SCRIPTING,"if")) { return T_IF; } if (stateTest(ST_IN_SCRIPTING,"elseif")) { return T_ELSEIF; } if (stateTest(ST_IN_SCRIPTING,"endif")) { return T_ENDIF; } if (stateTest(ST_IN_SCRIPTING,"else")) { return T_ELSE; } if (stateTest(ST_IN_SCRIPTING,"while")) { return T_WHILE; } if (stateTest(ST_IN_SCRIPTING,"endwhile")) { return T_ENDWHILE; } if (stateTest(ST_IN_SCRIPTING,"do")) { return T_DO; } if (stateTest(ST_IN_SCRIPTING,"for")) { return T_FOR; } if (stateTest(ST_IN_SCRIPTING,"endfor" { return T_ENDFOR; } if (stateTest(ST_IN_SCRIPTING,"foreach")) { return T_FOREACH; } if (stateTest(ST_IN_SCRIPTING,"endforeach")) { return T_ENDFOREACH; } if (stateTest(ST_IN_SCRIPTING,"declare")) { return T_DECLARE; } if (stateTest(ST_IN_SCRIPTING,"enddeclare")) { return T_ENDDECLARE; } if (stateTest(ST_IN_SCRIPTING,"instanceof")) { return T_INSTANCEOF; } if (stateTest(ST_IN_SCRIPTING,"as")) { return T_AS; } if (stateTest(ST_IN_SCRIPTING,"switch")) { return T_SWITCH; } if (stateTest(ST_IN_SCRIPTING,"endswitch")) { return T_ENDSWITCH; } if (stateTest(ST_IN_SCRIPTING,"case")) { return T_CASE; } if (stateTest(ST_IN_SCRIPTING,"default")) { return T_DEFAULT; } if (stateTest(ST_IN_SCRIPTING,"break")) { return T_BREAK; } if (stateTest(ST_IN_SCRIPTING,"continue")) { return T_CONTINUE; } if (stateTest(ST_IN_SCRIPTING,"echo")) { return T_ECHO; } if (stateTest(ST_IN_SCRIPTING,"print")) { return T_PRINT; } if (stateTest(ST_IN_SCRIPTING,"class")) { return T_CLASS; } if (stateTest(ST_IN_SCRIPTING,"interface")) { return T_INTERFACE; } if (stateTest(ST_IN_SCRIPTING,"extends")) { return T_EXTENDS; } if (stateTest(ST_IN_SCRIPTING,"implements")) { return T_IMPLEMENTS; } if (stateTest(ST_IN_SCRIPTING, "->") || stateTest(ST_DOUBLE_QUOTES, "->") || stateTest(ST_BACKQUOTE, "->") || stateTest(ST_HEREDOC, "->") ) { pushState(ST_LOOKING_FOR_PROPERTY); return T_OBJECT_OPERATOR; } if (stateTestRe(ST_LOOKING_FOR_PROPERTY, "LABEL")) { popState(); return T_STRING; } if (stateTestRe(ST_LOOKING_FOR_PROPERTY, "ANY_CHAR")) { //yyless(0); popState(); } if (stateTest(ST_IN_SCRIPTING,"::")) { return T_PAAMAYIM_NEKUDOTAYIM; } if (stateTest(ST_IN_SCRIPTING,"new")) { return T_NEW; } if (stateTest(ST_IN_SCRIPTING,"clone")) { return T_CLONE; } if (stateTest(ST_IN_SCRIPTING,"var")) { return T_VAR; } if (stateTestRe(ST_IN_SCRIPTING,"TABS_AND_SPACES(int|integer)TABS_AND_SPACES")) { return T_INT_CAST; } if (stateTestRe(ST_IN_SCRIPTING,"TABS_AND_SPACESstringTABS_AND_SPACES")) { return T_STRING_CAST; } if (stateTestRe(ST_IN_SCRIPTING,"TABS_AND_SPACESarrayTABS_AND_SPACES")) { return T_ARRAY_CAST; } if (stateTestRe(ST_IN_SCRIPTING,"TABS_AND_SPACESobjectTABS_AND_SPACES")) { return T_OBJECT_CAST; } if (stateTestRe(ST_IN_SCRIPTING,"TABS_AND_SPACESbool|booleanTABS_AND_SPACES")) { return T_BOOL_CAST; } if (stateTest(ST_IN_SCRIPTING,"TABS_AND_SPACESunsetTABS_AND_SPACES")) { return T_UNSET_CAST; } if (stateTest(ST_IN_SCRIPTING,"eval")) { return T_EVAL; } if (stateTest(ST_IN_SCRIPTING,"include")) { return T_INCLUDE; } if (stateTest(ST_IN_SCRIPTING,"include_once")) { return T_INCLUDE_ONCE; } if (stateTest(ST_IN_SCRIPTING,"require")) { return T_REQUIRE; } if (stateTest(ST_IN_SCRIPTING,"require_once")) { return T_REQUIRE_ONCE; } if (stateTest(ST_IN_SCRIPTING,"use")) { return T_USE; } if (stateTest(ST_IN_SCRIPTING,"global")) { return T_GLOBAL; } if (stateTest(ST_IN_SCRIPTING,"isset" { return T_ISSET; } if (stateTest(ST_IN_SCRIPTING,"empty" { return T_EMPTY; } if (stateTest(ST_IN_SCRIPTING,"__halt_compiler")) { return T_HALT_COMPILER; } if (stateTest(ST_IN_SCRIPTING,"static")) { return T_STATIC; } if (stateTest(ST_IN_SCRIPTING,"abstract")) { return T_ABSTRACT; } if (stateTest(ST_IN_SCRIPTING,"final")) { return T_FINAL; } if (stateTest(ST_IN_SCRIPTING,"private")) { return T_PRIVATE; } if (stateTest(ST_IN_SCRIPTING,"protected")) { return T_PROTECTED; } if (stateTest(ST_IN_SCRIPTING,"public")) { return T_PUBLIC; } if (stateTest(ST_IN_SCRIPTING,"unset")) { return T_UNSET; } if (stateTest(ST_IN_SCRIPTING,"=>")) { return T_DOUBLE_ARROW; } if (stateTest(ST_IN_SCRIPTING,"list")) { return T_LIST; } if (stateTest(ST_IN_SCRIPTING,"array")) { return T_ARRAY; } if (stateTest(ST_IN_SCRIPTING,"++")) { return T_INC; } if (stateTest(ST_IN_SCRIPTING,"--")) { return T_DEC; } if (stateTest(ST_IN_SCRIPTING,"===")) { return T_IS_IDENTICAL; } if (stateTest(ST_IN_SCRIPTING,"!==" { return T_IS_NOT_IDENTICAL; } if (stateTest(ST_IN_SCRIPTING,"==")) { return T_IS_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"!=") || stateTest(ST_IN_SCRIPTING,"<>")) { return T_IS_NOT_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"<=")) { return T_IS_SMALLER_OR_EQUAL; } if (stateTest(ST_IN_SCRIPTING,">=")) { return T_IS_GREATER_OR_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"+=" )){ return T_PLUS_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"==")) { return T_MINUS_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"*=")) { return T_MUL_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"/=")) { return T_DIV_EQUAL; } if (stateTest(ST_IN_SCRIPTING,".=")) { return T_CONCAT_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"%=")) { return T_MOD_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"<<=" )){ return T_SL_EQUAL; } if (stateTest(ST_IN_SCRIPTING,">>=")) { return T_SR_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"&=" )){ return T_AND_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"|=" )){ return T_OR_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"^=" )){ return T_XOR_EQUAL; } if (stateTest(ST_IN_SCRIPTING,"||" )){ return T_BOOLEAN_OR; } if (stateTest(ST_IN_SCRIPTING,"&&" )){ return T_BOOLEAN_AND; } if (stateTest(ST_IN_SCRIPTING,"OR" )){ return T_LOGICAL_OR; } if (stateTest(ST_IN_SCRIPTING,"AND")) { return T_LOGICAL_AND; } if (stateTest(ST_IN_SCRIPTING,"XOR")) { return T_LOGICAL_XOR; } if (stateTest(ST_IN_SCRIPTING,"<<" )){ return T_SL; } if (stateTest(ST_IN_SCRIPTING,">>" )){ return T_SR; } if (stateTestRe(ST_IN_SCRIPTING,"TOKENS")){ return yytext[0]; } if (stateTest(ST_IN_SCRIPTING,"{")) { pushState(ST_IN_SCRIPTING); return '{'; } if (stateTest(ST_DOUBLE_QUOTES,"${") || stateTest(ST_BACKQUOTE,"${")|| stateTest(ST_HEREDOC,"${")) { pushState(ST_LOOKING_FOR_VARNAME); return T_DOLLAR_OPEN_CURLY_BRACES; } if (stateTest(ST_IN_SCRIPTING, "}")) { //RESET_DOC_COMMENT(); /* This is a temporary fix which is dependant on flex and it's implementation */ //if (yy_start_stack_ptr) { popState(TSRMLS_C); } return '}'; } if (stateTestRe(ST_LOOKING_FOR_VARNAME, "LABEL")) { popState(); pushState(ST_IN_SCRIPTING); return T_STRING_VARNAME; } if (stateTestRe(ST_LOOKING_FOR_VARNAME,"ANY_CHAR")){ //yyless(0); popState(); pushState(ST_IN_SCRIPTING); } if (stateTestRe(ST_IN_SCRIPTING,"LNUM")) { //errno = 0; //zendlval->value.lval = strtol(yytext, NULL, 0); //if (errno == ERANGE) { /* overflow */ // zendlval->value.dval = zend_strtod(yytext, NULL); // zendlval->type = IS_DOUBLE; return T_DNUMBER; //} else { // zendlval->type = IS_LONG; // return T_LNUMBER; //} } if (stateTestRe(ST_IN_SCRIPTING,"HNUM")) { //errno = 0; //zendlval->value.lval = strtoul(yytext, NULL, 16); //if (errno == ERANGE) { /* overflow */ /* not trying strtod - it returns trash on 0x-es */ // zendlval->value.lval = LONG_MAX; /* maximal long */ // zend_error(E_NOTICE,"Hex number is too big: %s", yytext); //} else { // if (zendlval->value.lval < 0) { /* maintain consistency with the old way */ // zendlval->value.dval = (unsigned long) zendlval->value.lval; // zendlval->type = IS_DOUBLE; // return T_DNUMBER; // } // zendlval->type = IS_LONG; //} //zendlval->type = IS_LONG; return T_LNUMBER; } if (stateTestRe(ST_DOUBLE_QUOTES, "LNUM") || stateTestRe(ST_BACKQUOTE, "LNUM") || stateTestRe(ST_HEREDOC, "LNUM") stateTestRe(ST_DOUBLE_QUOTES, "HNUM") || stateTestRe(ST_BACKQUOTE, "HNUM") || stateTestRe(ST_HEREDOC, "HNUM") ) { //zendlval->value.str.val = (char *)estrndup(yytext, yyleng); //zendlval->value.str.len = yyleng; //zendlval->type = IS_STRING; return T_NUM_STRING; } if (stateTestRe(ST_IN_SCRIPTING,"DNUM") || stateTestRe(ST_IN_SCRIPTING,"EXPONENT_DNUM") ) { //zendlval->value.dval = zend_strtod(yytext, NULL); //zendlval->type = IS_DOUBLE; return T_DNUMBER; } if (stateTest(ST_IN_SCRIPTING, "__CLASS__")) { /*char *class_name = NULL; if (CG(active_class_entry)) { class_name = CG(active_class_entry)->name; } if (!class_name) { class_name = ""; } zendlval->value.str.len = strlen(class_name); zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len); zendlval->type = IS_STRING;*/ return T_CLASS_C; } if (stateTest(ST_IN_SCRIPTING,"__FUNCTION__")) { /* char *func_name = NULL; if (CG(active_op_array)) { func_name = CG(active_op_array)->function_name; } if (!func_name) { func_name = ""; } zendlval->value.str.len = strlen(func_name); zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len); zendlval->type = IS_STRING; */ return T_FUNC_C; } if (stateTest(ST_IN_SCRIPTING,"__METHOD__" )) { /* char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL; char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL; size_t len = 0; if (class_name) { len += strlen(class_name) + 2; } if (func_name) { len += strlen(func_name); } zendlval->value.str.val = emalloc(len+1); zendlval->value.str.len = sprintf(zendlval->value.str.val, "%s%s%s", class_name ? class_name : "", class_name && func_name ? "::" : "", func_name ? func_name : "" ); zendlval->value.str.len = strlen(zendlval->value.str.val); zendlval->type = IS_STRING; */ return T_METHOD_C; } if (stateTest(ST_IN_SCRIPTING,"__LINE__")) { //zendlval->value.lval = CG(zend_lineno); //zendlval->type = IS_LONG; return T_LINE; } if (stateTest(ST_IN_SCRIPTING,"__FILE__")) { /*char *filename = zend_get_compiled_filename(TSRMLS_C); if (!filename) { filename = ""; } zendlval->value.str.len = strlen(filename); zendlval->value.str.val = estrndup(filename, zendlval->value.str.len); zendlval->type = IS_STRING;*/ return T_FILE; } if (stateTestRe(INITIAL, "STANDARD_START")) { //"" pushState(ST_IN_SCRIPTING); return T_OPEN_TAG; /* HANDLE_NEWLINES(yytext, yyleng); if (CG(short_tags) || yyleng>2) { yyleng>2 means it's not zendlval->value.str.val = yytext; /* no copying - intentional zendlval->value.str.len = yyleng; zendlval->type = IS_STRING; BEGIN(ST_IN_SCRIPTING); return T_OPEN_TAG; } else { zendlval->value.str.val = (char *) estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; zendlval->type = IS_STRING; return T_INLINE_HTML; } */ } if (stateTest(INITIAL,"<%=") || stateTest(INITIAL,"value.str.val = yytext; /* no copying - intentional zendlval->value.str.len = yyleng; zendlval->type = IS_STRING; BEGIN(ST_IN_SCRIPTING); return T_OPEN_TAG_WITH_ECHO; } else { zendlval->value.str.val = (char *) estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; zendlval->type = IS_STRING; return T_INLINE_HTML; } */ } if (stateTest(INITIAL,"<%")) { pushState(ST_IN_SCRIPTING); return T_OPEN_TAG; //if (CG(asp_tags)) { // zendlval->value.str.val = yytext; /* no copying - intentional */ // zendlval->value.str.len = yyleng; // zendlval->type = IS_STRING; // BEGIN(ST_IN_SCRIPTING); // return T_OPEN_TAG; //} else { // zendlval->value.str.val = (char *) estrndup(yytext, yyleng); // zendlval->value.str.len = yyleng; // zendlval->type = IS_STRING; // return T_INLINE_HTML; //} } /* if (stateTestRe(INITIAL,"value.str.val = yytext; /* no copying - intentional //zendlval->value.str.len = yyleng; //zendlval->type = IS_STRING; //HANDLE_NEWLINE(yytext[yyleng-1]); pushState(ST_IN_SCRIPTING); return T_OPEN_TAG; } */ if (stateTestRe(INITIAL,"NOT_START")) { //(([^<]|"<"[^?%s<]){1,400})|"value.str.val), &(zendlval->value.str.len), yytext, yyleng TSRMLS_CC); if (readsize < yyleng) { yyless(readsize); } } else { zendlval->value.str.val = (char *) estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; } #else /* !ZEND_MULTIBYTE zendlval->value.str.val = (char *) estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; #endif /* ZEND_MULTIBYTE zendlval->type = IS_STRING; HANDLE_NEWLINES(yytext, yyleng); */ return T_INLINE_HTML; } if (stateTestRe(ST_IN_SCRIPTING,"$LABEL") || stateTestRe(ST_DOUBLE_QUOTES,"$LABEL") || stateTestRe(ST_HEREDOC,"$LABEL") || stateTestRe(ST_BACKQUOTE,"$LABEL")) { //zend_copy_value(zendlval, (yytext+1), (yyleng-1)); //zendlval->type = IS_STRING; return T_VARIABLE; } if (stateTestRe(ST_IN_SCRIPTING,"LABEL")) { //zend_copy_value(zendlval, yytext, yyleng); //zendlval->type = IS_STRING; return T_STRING; } if (stateTestRe(ST_DOUBLE_QUOTES,"LABEL") || stateTestRe(ST_BACKQUOTE,"LABEL") || stateTestRe(ST_HEREDOC,"LABEL")) { //zend_copy_value(zendlval, yytext, yyleng); //zendlval->type = IS_STRING; return T_STRING; } if (stateTest(ST_IN_SCRIPTING,"WHITESPACE")) { //zendlval->value.str.val = yytext; /* no copying - intentional */ //zendlval->value.str.len = yyleng; //zendlval->type = IS_STRING; //HANDLE_NEWLINES(yytext, yyleng); return T_WHITESPACE; } if (stateTest(ST_IN_SCRIPTING,"#") || stateTest(ST_IN_SCRIPTING,"//")) { pushState(ST_ONE_LINE_COMMENT); //BEGIN(ST_ONE_LINE_COMMENT); //yymore(); } if (stateTest(ST_ONE_LINE_COMMENT, "?") || stateTest(ST_ONE_LINE_COMMENT, "%") || stateTest(ST_ONE_LINE_COMMENT, ">")) //"?"|"%"|">" { //yymore(); } if (stateTestRe(ST_ONE_LINE_COMMENT,"ONE_LINE_COMMENT_END")) { //[^\n\r?%>]*{ANY_CHAR} //switch (yytext[yyleng-1]) { // case '?': case '%': case '>': // yyless(yyleng-1); // yymore(); // break; // case '\n': // CG(zend_lineno)++; /* intentional fall through */ // default: // zendlval->value.str.val = yytext; /* no copying - intentional */ // zendlval->value.str.len = yyleng; // zendlval->type = IS_STRING; pushState(ST_IN_SCRIPTING); return T_COMMENT; //} } if (stateTestRe(ST_ONE_LINE_COMMENT,"NEWLINE")) { //zendlval->value.str.val = yytext; /* no copying - intentional */ //zendlval->value.str.len = yyleng; //zendlval->type = IS_STRING; pushState(ST_IN_SCRIPTING); return T_COMMENT; //BEGIN(ST_IN_SCRIPTING); //CG(zend_lineno)++; //return T_COMMENT; } if (stateTest(ST_ONE_LINE_COMMENT, "?>") || stateTest(ST_ONE_LINE_COMMENT, "%>")) { //if (CG(asp_tags) || yytext[yyleng-2] != '%') { /* asp comment? */ // zendlval->value.str.val = yytext; /* no copying - intentional */ // zendlval->value.str.len = yyleng-2; // zendlval->type = IS_STRING; // yyless(yyleng-2); pushState(ST_IN_SCRIPTING); return T_COMMENT; //} else { // yymore(); //} } if (stateTestRe(ST_IN_SCRIPTING,"DOC_COMMENT_START" { // "**"{WHITESPACE} //CG(comment_start_line) = CG(zend_lineno); //RESET_DOC_COMMENT(); //BEGIN(ST_DOC_COMMENT); pushState(ST_DOC_COMMENT); //yymore(); } if (stateTest(ST_IN_SCRIPTING,"/*")) { //CG(comment_start_line) = CG(zend_lineno); pushState(ST_COMMENT); //yymore(); } if (stateTestRe(ST_COMMENT, "NOT_STAR") || // [^*]+ stateTestRe(ST_DOC_COMMENT, "NOT_STAR")) { //yymore(); } if (stateTest(ST_DOC_COMMENT,"*/")) { //CG(doc_comment) = estrndup(yytext, yyleng); //CG(doc_comment_len) = yyleng; //HANDLE_NEWLINES(yytext, yyleng); pushState(ST_IN_SCRIPTING); return T_DOC_COMMENT; } if (stateTest(ST_COMMENT,"*/")) { //HANDLE_NEWLINES(yytext, yyleng); pushState(ST_IN_SCRIPTING); return T_COMMENT; } if (stateTest(ST_COMMENT,"*") || stateTest(ST_DOC_COMMENT,"*")) { //yymore(); } if (stateTest(ST_IN_SCRIPTING,"?>") || stateTestRe(ST_IN_SCRIPTING,"END_SCRIPT")) { // ""){NEWLINE}? //zendlval->value.str.val = yytext; /* no copying - intentional */ //zendlval->value.str.len = yyleng; //zendlval->type = IS_STRING; pushState(INITIAL); return T_CLOSE_TAG; /* implicit ';' at php-end tag */ } if (stateTest(ST_IN_SCRIPTING,"%>")) { //{NEWLINE}? { pushState(INITIAL); return T_CLOSE_TAG; //if (CG(asp_tags)) { // BEGIN(INITIAL); // zendlval->value.str.len = yyleng; // zendlval->type = IS_STRING; // zendlval->value.str.val = yytext; /* no copying - intentional */ // return T_CLOSE_TAG; /* implicit ';' at php-end tag */ //} else { // yyless(1); // return yytext[0]; //} } if (stateTestRe(ST_IN_SCRIPTING,"QUOTED_STRING")) { // ([\"]([^$\"\\]|(\"\\\".))*[\"])")) /* register char *s, *t; char *end; zendlval->value.str.val = estrndup(yytext+1, yyleng-2); zendlval->value.str.len = yyleng-2; zendlval->type = IS_STRING; HANDLE_NEWLINES(yytext, yyleng); /* convert escape sequences s = t = zendlval->value.str.val; end = s+zendlval->value.str.len; while (s=end) { continue; } switch(*s) { case 'n': *t++ = '\n'; zendlval->value.str.len--; break; case 'r': *t++ = '\r'; zendlval->value.str.len--; break; case 't': *t++ = '\t'; zendlval->value.str.len--; break; case '\\': case '$': case '"': *t++ = *s; zendlval->value.str.len--; break; default: /* check for an octal if (ZEND_IS_OCT(*s)) { char octal_buf[4] = { 0, 0, 0, 0 }; octal_buf[0] = *s; zendlval->value.str.len--; if ((s+1)value.str.len--; if ((s+1)value.str.len--; } } *t++ = (char) strtol(octal_buf, NULL, 8); } else if (*s=='x' && (s+1)value.str.len--; /* for the 'x' hex_buf[0] = *(++s); zendlval->value.str.len--; if ((s+1)value.str.len--; } *t++ = (char) strtol(hex_buf, NULL, 16); } else { *t++ = '\\'; *t++ = *s; } break; } s++; } else { *t++ = *s++; } } *t = 0; #ifdef ZEND_MULTIBYTE if (SCNG(output_filter)) { s = zendlval->value.str.val; SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), s, zendlval->value.str.len TSRMLS_CC); efree(s); } #endif /* ZEND_MULTIBYTE */ return T_CONSTANT_ENCAPSED_STRING; } if (stateTestRe(ST_IN_SCRIPTING,"SQUOTED_STRING")) { //"([']([^'\\]|(\"\\\".))*['])" /* register char *s, *t; char *end; zendlval->value.str.val = estrndup(yytext+1, yyleng-2); zendlval->value.str.len = yyleng-2; zendlval->type = IS_STRING; HANDLE_NEWLINES(yytext, yyleng); /* convert escape sequences s = t = zendlval->value.str.val; end = s+zendlval->value.str.len; while (s=end) { continue; } switch(*s) { case '\\': case '\'': *t++ = *s; zendlval->value.str.len--; break; default: *t++ = '\\'; *t++ = *s; break; } s++; } else { *t++ = *s++; } } *t = 0; #ifdef ZEND_MULTIBYTE if (SCNG(output_filter)) { s = zendlval->value.str.val; SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), s, zendlval->value.str.len TSRMLS_CC); efree(s); } #endif /* ZEND_MULTIBYTE */ return T_CONSTANT_ENCAPSED_STRING; } if (stateTest(ST_IN_SCRIPTING,"\"")) { pushState(ST_DOUBLE_QUOTES); return '"'; } if (stateTestRe(ST_IN_SCRIPTING,"HEREDOC_START")) { //<<<{TABS_AND_SPACES}{LABEL}{NEWLINE} /* char *s; CG(zend_lineno)++; CG(heredoc_len) = yyleng-3-1-(yytext[yyleng-2]=='\r'?1:0); s = yytext+3; while ((*s == ' ') || (*s == '\t')) { s++; CG(heredoc_len)--; } CG(heredoc) = estrndup(s, CG(heredoc_len)); */ pushState(ST_HEREDOC); return T_START_HEREDOC; } if (stateTest(ST_IN_SCRIPTING,"`")) { pushState(ST_BACKQUOTE); return '`'; } if (stateTest(ST_IN_SCRIPTING,"'")) { pushState(ST_SINGLE_QUOTE); return '\''; } if (stateTestRe(T_HEREDOC,"HEREDOC_END")) { //^{LABEL}(";")?{NEWLINE} /* int label_len; if (yytext[yyleng-2]=='\r') { label_len = yyleng-2; } else { label_len = yyleng-1; } if (yytext[label_len-1]==';') { label_len--; } if (label_len==CG(heredoc_len) && !memcmp(yytext, CG(heredoc), label_len)) { zendlval->value.str.val = estrndup(yytext, label_len); /* unput destroys yytext zendlval->value.str.len = label_len; yyless(yyleng - (yyleng - label_len)); efree(CG(heredoc)); CG(heredoc)=NULL; CG(heredoc_len)=0; */ pushState(ST_IN_SCRIPTING); return T_END_HEREDOC; /* } else { CG(zend_lineno)++; zend_copy_value(zendlval, yytext, yyleng); zendlval->type = IS_STRING; return T_STRING; } */ } if (stateTestRe(ST_DOUBLE_QUOTES, "ESCAPED_AND_WHITESPACE") || stateTestRe(ST_BACKQUOTE, "ESCAPED_AND_WHITESPACE") || stateTestRe(ST_HEREDOC, "ESCAPED_AND_WHITESPACE")) { //HANDLE_NEWLINES(yytext, yyleng); //zendlval->value.str.val = (char *) estrndup(yytext, yyleng); //zendlval->value.str.len = yyleng; //zendlval->type = IS_STRING; return T_ENCAPSED_AND_WHITESPACE; } if (stateTestRe(ST_SINGLE_QUOTE,"SQUOTE_CONTENTS")) { //([^'\\]|\\[^'\\])+ //HANDLE_NEWLINES(yytext, yyleng); //zend_copy_value(zendlval, yytext, yyleng); //zendlval->type = IS_STRING; return T_ENCAPSED_AND_WHITESPACE; } if (stateTest(ST_DOUBLE_QUOTES,"`")) { // a little ineffecient.. //zend_copy_value(zendlval, yytext, yyleng); //zendlval->type = IS_STRING; return T_ENCAPSED_AND_WHITESPACE; } if (stateTest(ST_BACKQUOTE,"\"")) { // a little ineffecient.. //zend_copy_value(zendlval, yytext, yyleng); //zendlval->type = IS_STRING; return T_ENCAPSED_AND_WHITESPACE; } if (stateTestRe(ST_DOUBLE_QUOTES, "NOT_A_VARIABLE") || //"$"[^a-zA-Z_\x7f-\xff{] stateTestRe(ST_BACKQUOTE, "NOT_A_VARIABLE") || stateTestRe(ST_HEREDOC, "NOT_A_VARIABLE")) { //zendlval->value.lval = (long) yytext[0]; //if (yyleng == 2) { // yyless(1); //} return T_CHARACTER; } if (stateTestRe(ST_DOUBLE_QUOTES, "ENCAPSED_TOKEN") || stateTestRe(ST_BACKQUOTE, "ENCAPSED_TOKEN") || stateTestRe(ST_HEREDOC, "ENCAPSED_TOKEN")) { //zendlval->value.lval = (long) yytext[0]; return yytext[0]; } if (stateTest(ST_DOUBLE_QUOTES,"\\{") || stateTest(ST_BACKQUOTE,"\\{") || stateTest(ST_HEREDOC,"\\{")) { //zendlval->value.str.val = estrndup("\\{", sizeof("\\{") - 1); //zendlval->value.str.len = sizeof("\\{") - 1; //zendlval->type = IS_STRING; return T_STRING; } if (stateTest(ST_DOUBLE_QUOTES,"{$") || stateTest(ST_BACKQUOTE,"{$") || stateTest(ST_HEREDOC,"{$")) { //zendlval->value.lval = (long) yytext[0]; pushState(ST_IN_SCRIPTING); //yy_push_state(ST_IN_SCRIPTING TSRMLS_CC); yyless(1); return T_CURLY_OPEN; } if (stateTest(ST_SINGLE_QUOTE,"\\'")) { //zendlval->value.lval = (long) '\''; return T_CHARACTER; } if (stateTest(ST_SINGLE_QUOTE,"\\\\")) { //"\\\\" { //zendlval->value.lval = (long)'\\'; return T_CHARACTER; } if (stateTest(ST_SINGLE_QUOTE,"\\\"")) { //zendlval->value.lval = (long) '"'; return T_CHARACTER; } if (stateTest(ST_SINGLE_QUOTE,"\\`")) { zendlval->value.lval = (long) '`'; return T_CHARACTER; } if (stateTestRe(ST_DOUBLE_QUOTES,"ESCAPED_CHAR") || // "\\"[0-7]{1,3} stateTestRe(ST_BACKQUOTE,"ESCAPED_CHAR") || stateTestRe(ST_HEREDOC,"ESCAPED_CHAR")) { //zendlval->value.lval = strtol(yytext+1, NULL, 8); return T_CHARACTER; } if (stateTestRe(ST_DOUBLE_QUOTES,"ESCAPED_HCHAR") || //"\\x"[0-9A-Fa-f]{1,2} stateTestRe(ST_BACKQUOTE,"ESCAPEDH_CHAR") || stateTestRe(ST_HEREDOC,"ESCAPED_HCHAR")) { //zendlval->value.lval = strtol (yytext+2, NULL, 16); return T_CHARACTER; } if (stateTestRe(ST_DOUBLE_QUOTES,"ESCAPED_ANY_CHAR}") || //"\\"{ANY_CHAR} stateTestRe(ST_BACKQUOTE,"ESCAPED_ANY_CHAR}") || stateTestRe(ST_HEREDOC,"ESCAPED_ANY_CHAR}") { /* switch (yytext[1]) { case 'n': zendlval->value.lval = (long) '\n'; break; case 't': zendlval->value.lval = (long) '\t'; break; case 'r': zendlval->value.lval = (long) '\r'; break; case '\\': zendlval->value.lval = (long) '\\'; break; case '$': zendlval->value.lval = (long) yytext[1]; break; default: zendlval->value.str.val = estrndup(yytext, yyleng); zendlval->value.str.len = yyleng; zendlval->type = IS_STRING; return T_BAD_CHARACTER; break; } */ return T_CHARACTER; } if (stateTest(ST_HEREDOC, "\"") || //["'`]+ { stateTest(ST_HEREDOC, "'") || stateTest(ST_HEREDOC, "`")) { //zendlval->value.str.val = (char *) estrndup(yytext, yyleng); //zendlval->value.str.len = yyleng; //zendlval->type = IS_STRING; return T_ENCAPSED_AND_WHITESPACE; } if (stateTest(ST_DOUBLE_QUOTES, "\"")) { pushState(ST_IN_SCRIPTING); return '\"'; } if (stateTest(ST_BACKQUOTE,"`")) { pushState(ST_IN_SCRIPTING); return '`'; } if (stateTest(ST_SINGLE_QUOTE,"'")) { pushState(ST_IN_SCRIPTING); return '\''; } if (isEof() && ( isState(ST_DOUBLE_QUOTES) || isState(ST_BACKQUOTE) || isState(INITIAL) || isState(ST_IN_SCRIPTING) || isState(ST_LOOKING_FOR_PROPERTY))) { return 0; } if (isEof() && ( isState(ST_COMMENT) || isState(ST_DOC_COMMENT))) //zend_error(E_COMPILE_WARNING,"Unterminated comment starting line %d", CG(comment_start_line)); return 0; } //{ANY_CHAR} { // zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); //} writefln("Unexpected character in input: '" , currentChar() , " (ASCII=" , cast(int)currentChar(), ") state=" , getState); } boolean stateTestRe(states s, String rege) { if (currentState != s) { return false; } int y; switch (rege) { case "LABEL": //LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* char c0 = input[yypos]; if ( ((c0 >= 'a') && (c0 <='z')) || ((c0 >= 'A') && (c0 <='Z')) || (c0 == '_') || ((c0 >= \0x7f ) && (c0 <= \0xff)) ) { //c0_match = true; } else { return false; } y = yypos + 1; // next chars.. while (y < input.length) { char c0 = input[y]; if ( ((c0 >= 'a') && (c0 <='z')) || ((c0 >= 'A') && (c0 <='Z')) || ((c0 >= '0') && (c0 <='9')) || (c0 == '_') || ((c0 >= \0x7f ) && (c0 <= \0xff)) ) { y++; continue; } y--; break; } yypos = y+1; return true; case "$LABEL": char c0 = input[yypos]; if (c0 != '$') { return false; } int yystart = yypos; yypos++; if (!stateTestRe(s, "LABEL")) { yypos = yystart; return false; } return true; case "LNUM": //[0-9]+ int y = yypos; while (y < input.length) { char c0 = input[y]; if ((c0 >= '0') && (c0 <='9')) { y++; continue; } y--; break; } if (y <= yypos) { return false; } yypos = y+1; return true; case "DNUM": //DNUM ([0-9]*[\.][0-9]+) //|([0-9]+[\.][0-9]*) // eg. 0000.00000, .00000 , 000. y = yypos; int got_dot = false; while (y < input.length) { char c0 = input[y]; if ((c0 >= '0') && (c0 <='9')) { y++; continue; } if (!got_dot && c0=='.') { got_dot = true; y++; continue; } y--; break; } if (y <= yypos) { return false; } if (((y - yypos) == 1) && (input[yypos] == '.')) { return false; } yypos = y+1; return true; case "EXPONENT_DNUM": //(({LNUM}|{DNUM})[eE][+-]?{LNUM}) int ystart = yypos; if (!this.stateTestRe(s, "LNUM")) { yypos = ystart; if (!this.stateTestRe(s, "DNUM")) { yypos = ystart; return false; } } y = yypos; char c0 = input[y]; if ((c0 != 'e') && (c0 != 'E')) { yypos = ystart; return false; } y++; char c0 = input[y]; if ((c0 == '+') || (c0 == '-')) { y++; } yypos = y; if (!this.stateTestRe(s, "LNUM")) { yypos = ystart; return false; } // yypos should be ok! return true; case "HNUM" // "0x"[0-9a-fA-F]+ int y = yypos; while (y < input.length) { char c0 = input[y]; if ((y == yypos) && (c0 != '0')) { return false; } if (y == yypos) { y++; continue; } if ((y == (yypos + 1)) && (c0 != 'x')) { return false; } if (y == (yypos +1)) { y++; continue; } if ( ((c0 >= 'a') && (c0 <='f')) || ((c0 >= 'A') && (c0 <='F')) || ((c0 >= '0') && (c0 <='9')) ) { yy++; continue; } y--; break; } if (y <= (yypos + 2)) { return false; } yypos = y+1; return true; case "WHITESPACE": // [ \n\r\t]+ int y = yypos; while (y < input.length) { char c0 = input[y]; if ((c0 == ' ') || (c0 == '\n') || (c0 == '\r') || (c0 == '\t') ) { if (c0 == '\n') { this.lines ++; } y++; continue; } y--; break; } if (y <= yypos) { return false; } yypos = y+1; return true; case "TABS_AND_SPACES": // [ \t]* - really just shifts the yypos .. int y = yypos; while (y < input.length) { char c0 = input[y]; if ((c0 == ' ') || (c0 == '\t') ) { y++; continue; } y--; break; } yypos = y+1; return true; case "TOKENS": //[;:,.\[\]()|^&+-/*=%!~$<>?@] if (isEof()) { return false; } int y = yypos; char c0 = input[y]; if ((c0 == ';') || (c0 == ':') || (c0 == ',') || (c0 == '.') || (c0 == '[') || (c0 == ']') || (c0 == '(') || (c0 == ')') || (c0 == '|') || (c0 == '^') || (c0 == '&') || (c0 == '+') || (c0 == '-') || (c0 == '/') || (c0 == '*') || (c0 == '=') || (c0 == '%') || (c0 == '!') || (c0 == '~') || (c0 == '$') || (c0 == '<') || (c0 == '>') || (c0 == '?') || (c0 == '@')) { yypos++; return true; } return false; case "ENCAPSED_TOKENS": //[\[\]{}$] if (isEof()) { return false; } int y = yypos; char c0 = input[y]; if ((c0 == '[') || (c0 == ']') || (c0 == '{') || (c0 == '}') || (c0 == '$') ) { yypos++; return true; } return false; case "ESCAPED_AND_WHITESPACE": //[\n\t\r #'.:;,()|^&+-/*=%!~<>?@]+ int y = yypos; while (y < input.length) { char c0 = input[y]; if ((c0 == '\n') || (c0 == '\t') || (c0 == '\r') || (c0 == ' ') || (c0 == '#') || (c0 == '\'') || (c0 == '.') || (c0 == ':') || (c0 == ';') || (c0 == ',') || // (c0 == '[') || // (c0 == ']') || (c0 == '(') || (c0 == ')') || (c0 == '|') || (c0 == '^') || (c0 == '&') || (c0 == '+') || (c0 == '-') || (c0 == '/') || (c0 == '*') || (c0 == '=') || (c0 == '%') || (c0 == '!') || (c0 == '~') || //(c0 == '$') || (c0 == '<') || (c0 == '>') || (c0 == '?') || (c0 == '@')) { if (c0 == '\n') { this.lines ++; } y++; continue; } y--; break; } if (y <= yypos) { return false; } yypos = y+1; return true; case "ANY_CHAR": if (y < input.length) { yypos ++; return true; } return false; case "NEWLINE": // ("\r"|"\n"|"\r\n") if (y >= input.length) { return false; } char c0 = input[y]; if ((y+1) < input.length) { char c1 = input[y+1]; if ((c0 == '\r') && (c1 == '\n')) { yypos +=2; return true; } } if (((c0 == '\n') || (c0 == '\r'))) { if (c0 == '\n') { this.lines ++; } yypos++; return true; } return false; case "TABS_AND_SPACES(int|integer)TABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); int yyw = yypos; if (!stateTest(s,"int") && !stateTest(s,"integer")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "TABS_AND_SPACESstringTABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); int yyw = yypos; if (!stateTest(s,"string")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "TABS_AND_SPACESarrayTABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); int yyw = yypos; if (!stateTest(s,"array")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "TABS_AND_SPACESobjectTABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); int yyw = yypos; if (!stateTest(s,"object")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "TABS_AND_SPACESbool|booleanTABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); int yyw = yypos; if (!stateTest(s,"bool") && !stateTest(s,"boolean")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "TABS_AND_SPACESunsetTABS_AND_SPACES": int yystart = yypos; stateTestRe(s, "TABS_AND_SPACES"); int yyw = yypos; if (!stateTest(s,"unset")) { yypos = yystart; return false; } stateTestRe(s, "TABS_AND_SPACES"); return true; case "STANDARD_START": //"" if (stateTest(s, "") yypos = yystart; return false; } return true; case "NOT_START": //(([^<] |"<"[^?%s<]){1,400})| ""); }