class PhpTokenizer
{
states currentState;
states[int] stateStack;
String input
int yypos;
int yystart;
struct token {
char t;
String str;
int line;
}
token[int] tokens;
this(String inp)
{
this.input = inp;
pushState(INITIAL);
char c = token;
yypos = 0;
yystart = 0;
cline = 0;
while (0 ~= (c = this.parseNext())) {
tok = new token;
tok.t = c;
tok.str = input[yystart..(yypos - yystart);
tok.line = cline;
writefln("got token: ", cast(int)c, ", VAL:", tok.str, " @ line:" ,tok.line);
token ~= cast(char) tok;
yystart = yypos;
cline = this.line;
}
}
enum states {
INITIAL,
ST_IN_SCRIPTING,
ST_DOUBLE_QUOTES,
ST_SINGLE_QUOTE,
ST_BACKQUOTE,
ST_HEREDOC,
ST_LOOKING_FOR_PROPERTY,
ST_LOOKING_FOR_VARNAME,
ST_COMMENT,
ST_DOC_COMMENT,
ST_ONE_LINE_COMMENT
}
const int T_REQUIRE_ONCE = 258,
const int T_REQUIRE = 259,
const int T_EVAL = 260,
const int T_INCLUDE_ONCE = 261,
const int T_INCLUDE = 262,
const int T_LOGICAL_OR = 263,
const int T_LOGICAL_XOR = 264,
const int T_LOGICAL_AND = 265,
const int T_PRINT = 266,
const int T_SR_EQUAL = 267,
const int T_SL_EQUAL = 268,
const int T_XOR_EQUAL = 269,
const int T_OR_EQUAL = 270,
const int T_AND_EQUAL = 271,
const int T_MOD_EQUAL = 272,
const int T_CONCAT_EQUAL = 273,
const int T_DIV_EQUAL = 274,
const int T_MUL_EQUAL = 275,
const int T_MINUS_EQUAL = 276,
const int T_PLUS_EQUAL = 277,
const int T_BOOLEAN_OR = 278,
const int T_BOOLEAN_AND = 279,
const int T_IS_NOT_IDENTICAL = 280,
const int T_IS_IDENTICAL = 281,
const int T_IS_NOT_EQUAL = 282,
const int T_IS_EQUAL = 283,
const int T_IS_GREATER_OR_EQUAL = 284,
const int T_IS_SMALLER_OR_EQUAL = 285,
const int T_SR = 286,
const int T_SL = 287,
const int T_INSTANCEOF = 288,
const int T_UNSET_CAST = 289,
const int T_BOOL_CAST = 290,
const int T_OBJECT_CAST = 291,
const int T_ARRAY_CAST = 292,
const int T_STRING_CAST = 293,
const int T_DOUBLE_CAST = 294,
const int T_INT_CAST = 295,
const int T_DEC = 296,
const int T_INC = 297,
const int T_CLONE = 298,
const int T_NEW = 299,
const int T_EXIT = 300,
const int T_IF = 301,
const int T_ELSEIF = 302,
const int T_ELSE = 303,
const int T_ENDIF = 304,
const int T_LNUMBER = 305,
const int T_DNUMBER = 306,
const int T_STRING = 307,
const int T_STRING_VARNAME = 308,
const int T_VARIABLE = 309,
const int T_NUM_STRING = 310,
const int T_INLINE_HTML = 311,
const int T_CHARACTER = 312,
const int T_BAD_CHARACTER = 313,
const int T_ENCAPSED_AND_WHITESPACE = 314,
const int T_CONSTANT_ENCAPSED_STRING = 315,
const int T_ECHO = 316,
const int T_DO = 317,
const int T_WHILE = 318,
const int T_ENDWHILE = 319,
const int T_FOR = 320,
const int T_ENDFOR = 321,
const int T_FOREACH = 322,
const int T_ENDFOREACH = 323,
const int T_DECLARE = 324,
const int T_ENDDECLARE = 325,
const int T_AS = 326,
const int T_SWITCH = 327,
const int T_ENDSWITCH = 328,
const int T_CASE = 329,
const int T_DEFAULT = 330,
const int T_BREAK = 331,
const int T_CONTINUE = 332,
const int T_FUNCTION = 333,
const int T_CONST = 334,
const int T_RETURN = 335,
const int T_TRY = 336,
const int T_CATCH = 337,
const int T_THROW = 338,
const int T_USE = 339,
const int T_GLOBAL = 340,
const int T_PUBLIC = 341,
const int T_PROTECTED = 342,
const int T_PRIVATE = 343,
const int T_FINAL = 344,
const int T_ABSTRACT = 345,
const int T_STATIC = 346,
const int T_VAR = 347,
const int T_UNSET = 348,
const int T_ISSET = 349,
const int T_EMPTY = 350,
const int T_HALT_COMPILER = 351,
const int T_CLASS = 352,
const int T_INTERFACE = 353,
const int T_EXTENDS = 354,
const int T_IMPLEMENTS = 355,
const int T_OBJECT_OPERATOR = 356,
const int T_DOUBLE_ARROW = 357,
const int T_LIST = 358,
const int T_ARRAY = 359,
const int T_CLASS_C = 360,
const int T_METHOD_C = 361,
const int T_FUNC_C = 362,
const int T_LINE = 363,
const int T_FILE = 364,
const int T_COMMENT = 365,
const int T_DOC_COMMENT = 366,
const int T_OPEN_TAG = 367,
const int T_OPEN_TAG_WITH_ECHO = 368,
const int T_CLOSE_TAG = 369,
const int T_WHITESPACE = 370,
const int T_START_HEREDOC = 371,
const int T_END_HEREDOC = 372,
const int T_DOLLAR_OPEN_CURLY_BRACES = 373,
const int T_CURLY_OPEN = 374,
const int T_PAAMAYIM_NEKUDOTAYIM = 375
};
boolean stateTest(states s, String str)
{
if (s != currentState) {
return false;
}
if (input[yypos.. str.length] == str) {
yypos += str.length;
return true;
}
return false;
}
void pushState(state s)
{
stateStack ~= s;
this.currentState = s;
}
void popState()
{
state s = stateStack[stateStack.length]
this.currentState = s.remove(stateStack.length);
}
isState(state s)
{
return this.currentState == s;
}
boolean isEof()
{
return yypos > input.length;
}
int parseNext()
{
/* we need to sort this out so the longest match is returned.. */
if (stateTest(ST_IN_SCRIPTING, "exit")) {
return T_EXIT;
}
if (stateTest(ST_IN_SCRIPTING,"die")) {
return T_EXIT;
}
if (stateTest(ST_IN_SCRIPTING,"function")) {
return T_FUNCTION;
}
if (stateTest(ST_IN_SCRIPTING,"const")) {
return T_CONST;
}
if (stateTest(ST_IN_SCRIPTING,"return")) {
return T_RETURN;
}
if (stateTest(ST_IN_SCRIPTING,"try))" {
return T_TRY;
}
if (stateTest(ST_IN_SCRIPTING,"catch")) {
return T_CATCH;
}
if (stateTest(ST_IN_SCRIPTING,"throw")) {
return T_THROW;
}
if (stateTest(ST_IN_SCRIPTING,"if")) {
return T_IF;
}
if (stateTest(ST_IN_SCRIPTING,"elseif")) {
return T_ELSEIF;
}
if (stateTest(ST_IN_SCRIPTING,"endif")) {
return T_ENDIF;
}
if (stateTest(ST_IN_SCRIPTING,"else")) {
return T_ELSE;
}
if (stateTest(ST_IN_SCRIPTING,"while")) {
return T_WHILE;
}
if (stateTest(ST_IN_SCRIPTING,"endwhile")) {
return T_ENDWHILE;
}
if (stateTest(ST_IN_SCRIPTING,"do")) {
return T_DO;
}
if (stateTest(ST_IN_SCRIPTING,"for")) {
return T_FOR;
}
if (stateTest(ST_IN_SCRIPTING,"endfor" {
return T_ENDFOR;
}
if (stateTest(ST_IN_SCRIPTING,"foreach")) {
return T_FOREACH;
}
if (stateTest(ST_IN_SCRIPTING,"endforeach")) {
return T_ENDFOREACH;
}
if (stateTest(ST_IN_SCRIPTING,"declare")) {
return T_DECLARE;
}
if (stateTest(ST_IN_SCRIPTING,"enddeclare")) {
return T_ENDDECLARE;
}
if (stateTest(ST_IN_SCRIPTING,"instanceof")) {
return T_INSTANCEOF;
}
if (stateTest(ST_IN_SCRIPTING,"as")) {
return T_AS;
}
if (stateTest(ST_IN_SCRIPTING,"switch")) {
return T_SWITCH;
}
if (stateTest(ST_IN_SCRIPTING,"endswitch")) {
return T_ENDSWITCH;
}
if (stateTest(ST_IN_SCRIPTING,"case")) {
return T_CASE;
}
if (stateTest(ST_IN_SCRIPTING,"default")) {
return T_DEFAULT;
}
if (stateTest(ST_IN_SCRIPTING,"break")) {
return T_BREAK;
}
if (stateTest(ST_IN_SCRIPTING,"continue")) {
return T_CONTINUE;
}
if (stateTest(ST_IN_SCRIPTING,"echo")) {
return T_ECHO;
}
if (stateTest(ST_IN_SCRIPTING,"print")) {
return T_PRINT;
}
if (stateTest(ST_IN_SCRIPTING,"class")) {
return T_CLASS;
}
if (stateTest(ST_IN_SCRIPTING,"interface")) {
return T_INTERFACE;
}
if (stateTest(ST_IN_SCRIPTING,"extends")) {
return T_EXTENDS;
}
if (stateTest(ST_IN_SCRIPTING,"implements")) {
return T_IMPLEMENTS;
}
if (stateTest(ST_IN_SCRIPTING, "->") ||
stateTest(ST_DOUBLE_QUOTES, "->") ||
stateTest(ST_BACKQUOTE, "->") ||
stateTest(ST_HEREDOC, "->")
) {
pushState(ST_LOOKING_FOR_PROPERTY);
return T_OBJECT_OPERATOR;
}
if (stateTestRe(ST_LOOKING_FOR_PROPERTY, "LABEL")) {
popState();
return T_STRING;
}
if (stateTestRe(ST_LOOKING_FOR_PROPERTY, "ANY_CHAR")) {
//yyless(0);
popState();
}
if (stateTest(ST_IN_SCRIPTING,"::")) {
return T_PAAMAYIM_NEKUDOTAYIM;
}
if (stateTest(ST_IN_SCRIPTING,"new")) {
return T_NEW;
}
if (stateTest(ST_IN_SCRIPTING,"clone")) {
return T_CLONE;
}
if (stateTest(ST_IN_SCRIPTING,"var")) {
return T_VAR;
}
if (stateTestRe(ST_IN_SCRIPTING,"TABS_AND_SPACES(int|integer)TABS_AND_SPACES")) {
return T_INT_CAST;
}
if (stateTestRe(ST_IN_SCRIPTING,"TABS_AND_SPACESstringTABS_AND_SPACES")) {
return T_STRING_CAST;
}
if (stateTestRe(ST_IN_SCRIPTING,"TABS_AND_SPACESarrayTABS_AND_SPACES")) {
return T_ARRAY_CAST;
}
if (stateTestRe(ST_IN_SCRIPTING,"TABS_AND_SPACESobjectTABS_AND_SPACES")) {
return T_OBJECT_CAST;
}
if (stateTestRe(ST_IN_SCRIPTING,"TABS_AND_SPACESbool|booleanTABS_AND_SPACES")) {
return T_BOOL_CAST;
}
if (stateTest(ST_IN_SCRIPTING,"TABS_AND_SPACESunsetTABS_AND_SPACES")) {
return T_UNSET_CAST;
}
if (stateTest(ST_IN_SCRIPTING,"eval")) {
return T_EVAL;
}
if (stateTest(ST_IN_SCRIPTING,"include")) {
return T_INCLUDE;
}
if (stateTest(ST_IN_SCRIPTING,"include_once")) {
return T_INCLUDE_ONCE;
}
if (stateTest(ST_IN_SCRIPTING,"require")) {
return T_REQUIRE;
}
if (stateTest(ST_IN_SCRIPTING,"require_once")) {
return T_REQUIRE_ONCE;
}
if (stateTest(ST_IN_SCRIPTING,"use")) {
return T_USE;
}
if (stateTest(ST_IN_SCRIPTING,"global")) {
return T_GLOBAL;
}
if (stateTest(ST_IN_SCRIPTING,"isset" {
return T_ISSET;
}
if (stateTest(ST_IN_SCRIPTING,"empty" {
return T_EMPTY;
}
if (stateTest(ST_IN_SCRIPTING,"__halt_compiler")) {
return T_HALT_COMPILER;
}
if (stateTest(ST_IN_SCRIPTING,"static")) {
return T_STATIC;
}
if (stateTest(ST_IN_SCRIPTING,"abstract")) {
return T_ABSTRACT;
}
if (stateTest(ST_IN_SCRIPTING,"final")) {
return T_FINAL;
}
if (stateTest(ST_IN_SCRIPTING,"private")) {
return T_PRIVATE;
}
if (stateTest(ST_IN_SCRIPTING,"protected")) {
return T_PROTECTED;
}
if (stateTest(ST_IN_SCRIPTING,"public")) {
return T_PUBLIC;
}
if (stateTest(ST_IN_SCRIPTING,"unset")) {
return T_UNSET;
}
if (stateTest(ST_IN_SCRIPTING,"=>")) {
return T_DOUBLE_ARROW;
}
if (stateTest(ST_IN_SCRIPTING,"list")) {
return T_LIST;
}
if (stateTest(ST_IN_SCRIPTING,"array")) {
return T_ARRAY;
}
if (stateTest(ST_IN_SCRIPTING,"++")) {
return T_INC;
}
if (stateTest(ST_IN_SCRIPTING,"--")) {
return T_DEC;
}
if (stateTest(ST_IN_SCRIPTING,"===")) {
return T_IS_IDENTICAL;
}
if (stateTest(ST_IN_SCRIPTING,"!==" {
return T_IS_NOT_IDENTICAL;
}
if (stateTest(ST_IN_SCRIPTING,"==")) {
return T_IS_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"!=") ||
stateTest(ST_IN_SCRIPTING,"<>")) {
return T_IS_NOT_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"<=")) {
return T_IS_SMALLER_OR_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,">=")) {
return T_IS_GREATER_OR_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"+=" )){
return T_PLUS_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"==")) {
return T_MINUS_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"*=")) {
return T_MUL_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"/=")) {
return T_DIV_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,".=")) {
return T_CONCAT_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"%=")) {
return T_MOD_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"<<=" )){
return T_SL_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,">>=")) {
return T_SR_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"&=" )){
return T_AND_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"|=" )){
return T_OR_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"^=" )){
return T_XOR_EQUAL;
}
if (stateTest(ST_IN_SCRIPTING,"||" )){
return T_BOOLEAN_OR;
}
if (stateTest(ST_IN_SCRIPTING,"&&" )){
return T_BOOLEAN_AND;
}
if (stateTest(ST_IN_SCRIPTING,"OR" )){
return T_LOGICAL_OR;
}
if (stateTest(ST_IN_SCRIPTING,"AND")) {
return T_LOGICAL_AND;
}
if (stateTest(ST_IN_SCRIPTING,"XOR")) {
return T_LOGICAL_XOR;
}
if (stateTest(ST_IN_SCRIPTING,"<<" )){
return T_SL;
}
if (stateTest(ST_IN_SCRIPTING,">>" )){
return T_SR;
}
if (stateTestRe(ST_IN_SCRIPTING,"TOKENS")){
return yytext[0];
}
if (stateTest(ST_IN_SCRIPTING,"{")) {
pushState(ST_IN_SCRIPTING);
return '{';
}
if (stateTest(ST_DOUBLE_QUOTES,"${") ||
stateTest(ST_BACKQUOTE,"${")||
stateTest(ST_HEREDOC,"${")) {
pushState(ST_LOOKING_FOR_VARNAME);
return T_DOLLAR_OPEN_CURLY_BRACES;
}
if (stateTest(ST_IN_SCRIPTING, "}")) {
//RESET_DOC_COMMENT();
/* This is a temporary fix which is dependant on flex and it's implementation */
//if (yy_start_stack_ptr) {
popState(TSRMLS_C);
}
return '}';
}
if (stateTestRe(ST_LOOKING_FOR_VARNAME, "LABEL")) {
popState();
pushState(ST_IN_SCRIPTING);
return T_STRING_VARNAME;
}
if (stateTestRe(ST_LOOKING_FOR_VARNAME,"ANY_CHAR")){
//yyless(0);
popState();
pushState(ST_IN_SCRIPTING);
}
if (stateTestRe(ST_IN_SCRIPTING,"LNUM")) {
//errno = 0;
//zendlval->value.lval = strtol(yytext, NULL, 0);
//if (errno == ERANGE) { /* overflow */
// zendlval->value.dval = zend_strtod(yytext, NULL);
// zendlval->type = IS_DOUBLE;
return T_DNUMBER;
//} else {
// zendlval->type = IS_LONG;
// return T_LNUMBER;
//}
}
if (stateTestRe(ST_IN_SCRIPTING,"HNUM")) {
//errno = 0;
//zendlval->value.lval = strtoul(yytext, NULL, 16);
//if (errno == ERANGE) { /* overflow */
/* not trying strtod - it returns trash on 0x-es */
// zendlval->value.lval = LONG_MAX; /* maximal long */
// zend_error(E_NOTICE,"Hex number is too big: %s", yytext);
//} else {
// if (zendlval->value.lval < 0) {
/* maintain consistency with the old way */
// zendlval->value.dval = (unsigned long) zendlval->value.lval;
// zendlval->type = IS_DOUBLE;
// return T_DNUMBER;
// }
// zendlval->type = IS_LONG;
//}
//zendlval->type = IS_LONG;
return T_LNUMBER;
}
if (stateTestRe(ST_DOUBLE_QUOTES, "LNUM") ||
stateTestRe(ST_BACKQUOTE, "LNUM") ||
stateTestRe(ST_HEREDOC, "LNUM")
stateTestRe(ST_DOUBLE_QUOTES, "HNUM") ||
stateTestRe(ST_BACKQUOTE, "HNUM") ||
stateTestRe(ST_HEREDOC, "HNUM")
) {
//zendlval->value.str.val = (char *)estrndup(yytext, yyleng);
//zendlval->value.str.len = yyleng;
//zendlval->type = IS_STRING;
return T_NUM_STRING;
}
if (stateTestRe(ST_IN_SCRIPTING,"DNUM") ||
stateTestRe(ST_IN_SCRIPTING,"EXPONENT_DNUM") ) {
//zendlval->value.dval = zend_strtod(yytext, NULL);
//zendlval->type = IS_DOUBLE;
return T_DNUMBER;
}
if (stateTest(ST_IN_SCRIPTING, "__CLASS__")) {
/*char *class_name = NULL;
if (CG(active_class_entry)) {
class_name = CG(active_class_entry)->name;
}
if (!class_name) {
class_name = "";
}
zendlval->value.str.len = strlen(class_name);
zendlval->value.str.val = estrndup(class_name, zendlval->value.str.len);
zendlval->type = IS_STRING;*/
return T_CLASS_C;
}
if (stateTest(ST_IN_SCRIPTING,"__FUNCTION__")) {
/*
char *func_name = NULL;
if (CG(active_op_array)) {
func_name = CG(active_op_array)->function_name;
}
if (!func_name) {
func_name = "";
}
zendlval->value.str.len = strlen(func_name);
zendlval->value.str.val = estrndup(func_name, zendlval->value.str.len);
zendlval->type = IS_STRING;
*/
return T_FUNC_C;
}
if (stateTest(ST_IN_SCRIPTING,"__METHOD__" )) {
/*
char *class_name = CG(active_class_entry) ? CG(active_class_entry)->name : NULL;
char *func_name = CG(active_op_array)? CG(active_op_array)->function_name : NULL;
size_t len = 0;
if (class_name) {
len += strlen(class_name) + 2;
}
if (func_name) {
len += strlen(func_name);
}
zendlval->value.str.val = emalloc(len+1);
zendlval->value.str.len = sprintf(zendlval->value.str.val, "%s%s%s",
class_name ? class_name : "",
class_name && func_name ? "::" : "",
func_name ? func_name : ""
);
zendlval->value.str.len = strlen(zendlval->value.str.val);
zendlval->type = IS_STRING;
*/
return T_METHOD_C;
}
if (stateTest(ST_IN_SCRIPTING,"__LINE__")) {
//zendlval->value.lval = CG(zend_lineno);
//zendlval->type = IS_LONG;
return T_LINE;
}
if (stateTest(ST_IN_SCRIPTING,"__FILE__")) {
/*char *filename = zend_get_compiled_filename(TSRMLS_C);
if (!filename) {
filename = "";
}
zendlval->value.str.len = strlen(filename);
zendlval->value.str.val = estrndup(filename, zendlval->value.str.len);
zendlval->type = IS_STRING;*/
return T_FILE;
}
if (stateTestRe(INITIAL, "STANDARD_START")) { //""|""){NEWLINE}?
//zendlval->value.str.val = yytext; /* no copying - intentional */
//zendlval->value.str.len = yyleng;
//zendlval->type = IS_STRING;
pushState(INITIAL);
return T_CLOSE_TAG; /* implicit ';' at php-end tag */
}
if (stateTest(ST_IN_SCRIPTING,"%>")) { //{NEWLINE}? {
pushState(INITIAL);
return T_CLOSE_TAG;
//if (CG(asp_tags)) {
// BEGIN(INITIAL);
// zendlval->value.str.len = yyleng;
// zendlval->type = IS_STRING;
// zendlval->value.str.val = yytext; /* no copying - intentional */
// return T_CLOSE_TAG; /* implicit ';' at php-end tag */
//} else {
// yyless(1);
// return yytext[0];
//}
}
if (stateTestRe(ST_IN_SCRIPTING,"QUOTED_STRING")) { // ([\"]([^$\"\\]|(\"\\\".))*[\"])"))
/*
register char *s, *t;
char *end;
zendlval->value.str.val = estrndup(yytext+1, yyleng-2);
zendlval->value.str.len = yyleng-2;
zendlval->type = IS_STRING;
HANDLE_NEWLINES(yytext, yyleng);
/* convert escape sequences
s = t = zendlval->value.str.val;
end = s+zendlval->value.str.len;
while (s=end) {
continue;
}
switch(*s) {
case 'n':
*t++ = '\n';
zendlval->value.str.len--;
break;
case 'r':
*t++ = '\r';
zendlval->value.str.len--;
break;
case 't':
*t++ = '\t';
zendlval->value.str.len--;
break;
case '\\':
case '$':
case '"':
*t++ = *s;
zendlval->value.str.len--;
break;
default:
/* check for an octal
if (ZEND_IS_OCT(*s)) {
char octal_buf[4] = { 0, 0, 0, 0 };
octal_buf[0] = *s;
zendlval->value.str.len--;
if ((s+1)value.str.len--;
if ((s+1)value.str.len--;
}
}
*t++ = (char) strtol(octal_buf, NULL, 8);
} else if (*s=='x' && (s+1)value.str.len--; /* for the 'x'
hex_buf[0] = *(++s);
zendlval->value.str.len--;
if ((s+1)value.str.len--;
}
*t++ = (char) strtol(hex_buf, NULL, 16);
} else {
*t++ = '\\';
*t++ = *s;
}
break;
}
s++;
} else {
*t++ = *s++;
}
}
*t = 0;
#ifdef ZEND_MULTIBYTE
if (SCNG(output_filter)) {
s = zendlval->value.str.val;
SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), s, zendlval->value.str.len TSRMLS_CC);
efree(s);
}
#endif /* ZEND_MULTIBYTE
*/
return T_CONSTANT_ENCAPSED_STRING;
}
if (stateTestRe(ST_IN_SCRIPTING,"SQUOTED_STRING")) { //"([']([^'\\]|(\"\\\".))*['])"
/*
register char *s, *t;
char *end;
zendlval->value.str.val = estrndup(yytext+1, yyleng-2);
zendlval->value.str.len = yyleng-2;
zendlval->type = IS_STRING;
HANDLE_NEWLINES(yytext, yyleng);
/* convert escape sequences
s = t = zendlval->value.str.val;
end = s+zendlval->value.str.len;
while (s=end) {
continue;
}
switch(*s) {
case '\\':
case '\'':
*t++ = *s;
zendlval->value.str.len--;
break;
default:
*t++ = '\\';
*t++ = *s;
break;
}
s++;
} else {
*t++ = *s++;
}
}
*t = 0;
#ifdef ZEND_MULTIBYTE
if (SCNG(output_filter)) {
s = zendlval->value.str.val;
SCNG(output_filter)(&(zendlval->value.str.val), &(zendlval->value.str.len), s, zendlval->value.str.len TSRMLS_CC);
efree(s);
}
#endif /* ZEND_MULTIBYTE
*/
return T_CONSTANT_ENCAPSED_STRING;
}
if (stateTest(ST_IN_SCRIPTING,"\"")) {
pushState(ST_DOUBLE_QUOTES);
return '"';
}
if (stateTestRe(ST_IN_SCRIPTING,"HEREDOC_START")) { //<<<{TABS_AND_SPACES}{LABEL}{NEWLINE}
/*
char *s;
CG(zend_lineno)++;
CG(heredoc_len) = yyleng-3-1-(yytext[yyleng-2]=='\r'?1:0);
s = yytext+3;
while ((*s == ' ') || (*s == '\t')) {
s++;
CG(heredoc_len)--;
}
CG(heredoc) = estrndup(s, CG(heredoc_len));
*/
pushState(ST_HEREDOC);
return T_START_HEREDOC;
}
if (stateTest(ST_IN_SCRIPTING,"`")) {
pushState(ST_BACKQUOTE);
return '`';
}
if (stateTest(ST_IN_SCRIPTING,"'")) {
pushState(ST_SINGLE_QUOTE);
return '\'';
}
if (stateTestRe(T_HEREDOC,"HEREDOC_END")) { //^{LABEL}(";")?{NEWLINE}
/*
int label_len;
if (yytext[yyleng-2]=='\r') {
label_len = yyleng-2;
} else {
label_len = yyleng-1;
}
if (yytext[label_len-1]==';') {
label_len--;
}
if (label_len==CG(heredoc_len) && !memcmp(yytext, CG(heredoc), label_len)) {
zendlval->value.str.val = estrndup(yytext, label_len); /* unput destroys yytext
zendlval->value.str.len = label_len;
yyless(yyleng - (yyleng - label_len));
efree(CG(heredoc));
CG(heredoc)=NULL;
CG(heredoc_len)=0;
*/
pushState(ST_IN_SCRIPTING);
return T_END_HEREDOC;
/*
} else {
CG(zend_lineno)++;
zend_copy_value(zendlval, yytext, yyleng);
zendlval->type = IS_STRING;
return T_STRING;
}
*/
}
if (stateTestRe(ST_DOUBLE_QUOTES, "ESCAPED_AND_WHITESPACE") ||
stateTestRe(ST_BACKQUOTE, "ESCAPED_AND_WHITESPACE") ||
stateTestRe(ST_HEREDOC, "ESCAPED_AND_WHITESPACE")) {
//HANDLE_NEWLINES(yytext, yyleng);
//zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
//zendlval->value.str.len = yyleng;
//zendlval->type = IS_STRING;
return T_ENCAPSED_AND_WHITESPACE;
}
if (stateTestRe(ST_SINGLE_QUOTE,"SQUOTE_CONTENTS")) { //([^'\\]|\\[^'\\])+
//HANDLE_NEWLINES(yytext, yyleng);
//zend_copy_value(zendlval, yytext, yyleng);
//zendlval->type = IS_STRING;
return T_ENCAPSED_AND_WHITESPACE;
}
if (stateTest(ST_DOUBLE_QUOTES,"`")) { // a little ineffecient..
//zend_copy_value(zendlval, yytext, yyleng);
//zendlval->type = IS_STRING;
return T_ENCAPSED_AND_WHITESPACE;
}
if (stateTest(ST_BACKQUOTE,"\"")) { // a little ineffecient..
//zend_copy_value(zendlval, yytext, yyleng);
//zendlval->type = IS_STRING;
return T_ENCAPSED_AND_WHITESPACE;
}
if (stateTestRe(ST_DOUBLE_QUOTES, "NOT_A_VARIABLE") || //"$"[^a-zA-Z_\x7f-\xff{]
stateTestRe(ST_BACKQUOTE, "NOT_A_VARIABLE") ||
stateTestRe(ST_HEREDOC, "NOT_A_VARIABLE")) {
//zendlval->value.lval = (long) yytext[0];
//if (yyleng == 2) {
// yyless(1);
//}
return T_CHARACTER;
}
if (stateTestRe(ST_DOUBLE_QUOTES, "ENCAPSED_TOKEN") ||
stateTestRe(ST_BACKQUOTE, "ENCAPSED_TOKEN") ||
stateTestRe(ST_HEREDOC, "ENCAPSED_TOKEN")) {
//zendlval->value.lval = (long) yytext[0];
return yytext[0];
}
if (stateTest(ST_DOUBLE_QUOTES,"\\{") ||
stateTest(ST_BACKQUOTE,"\\{") ||
stateTest(ST_HEREDOC,"\\{")) {
//zendlval->value.str.val = estrndup("\\{", sizeof("\\{") - 1);
//zendlval->value.str.len = sizeof("\\{") - 1;
//zendlval->type = IS_STRING;
return T_STRING;
}
if (stateTest(ST_DOUBLE_QUOTES,"{$") ||
stateTest(ST_BACKQUOTE,"{$") ||
stateTest(ST_HEREDOC,"{$")) {
//zendlval->value.lval = (long) yytext[0];
pushState(ST_IN_SCRIPTING);
//yy_push_state(ST_IN_SCRIPTING TSRMLS_CC);
yyless(1);
return T_CURLY_OPEN;
}
if (stateTest(ST_SINGLE_QUOTE,"\\'")) {
//zendlval->value.lval = (long) '\'';
return T_CHARACTER;
}
if (stateTest(ST_SINGLE_QUOTE,"\\\\")) {
//"\\\\" {
//zendlval->value.lval = (long)'\\';
return T_CHARACTER;
}
if (stateTest(ST_SINGLE_QUOTE,"\\\"")) {
//zendlval->value.lval = (long) '"';
return T_CHARACTER;
}
if (stateTest(ST_SINGLE_QUOTE,"\\`")) {
zendlval->value.lval = (long) '`';
return T_CHARACTER;
}
if (stateTestRe(ST_DOUBLE_QUOTES,"ESCAPED_CHAR") || // "\\"[0-7]{1,3}
stateTestRe(ST_BACKQUOTE,"ESCAPED_CHAR") ||
stateTestRe(ST_HEREDOC,"ESCAPED_CHAR")) {
//zendlval->value.lval = strtol(yytext+1, NULL, 8);
return T_CHARACTER;
}
if (stateTestRe(ST_DOUBLE_QUOTES,"ESCAPED_HCHAR") || //"\\x"[0-9A-Fa-f]{1,2}
stateTestRe(ST_BACKQUOTE,"ESCAPEDH_CHAR") ||
stateTestRe(ST_HEREDOC,"ESCAPED_HCHAR")) {
//zendlval->value.lval = strtol (yytext+2, NULL, 16);
return T_CHARACTER;
}
if (stateTestRe(ST_DOUBLE_QUOTES,"ESCAPED_ANY_CHAR}") || //"\\"{ANY_CHAR}
stateTestRe(ST_BACKQUOTE,"ESCAPED_ANY_CHAR}") ||
stateTestRe(ST_HEREDOC,"ESCAPED_ANY_CHAR}") {
/*
switch (yytext[1]) {
case 'n':
zendlval->value.lval = (long) '\n';
break;
case 't':
zendlval->value.lval = (long) '\t';
break;
case 'r':
zendlval->value.lval = (long) '\r';
break;
case '\\':
zendlval->value.lval = (long) '\\';
break;
case '$':
zendlval->value.lval = (long) yytext[1];
break;
default:
zendlval->value.str.val = estrndup(yytext, yyleng);
zendlval->value.str.len = yyleng;
zendlval->type = IS_STRING;
return T_BAD_CHARACTER;
break;
}
*/
return T_CHARACTER;
}
if (stateTest(ST_HEREDOC, "\"") || //["'`]+ {
stateTest(ST_HEREDOC, "'") ||
stateTest(ST_HEREDOC, "`")) {
//zendlval->value.str.val = (char *) estrndup(yytext, yyleng);
//zendlval->value.str.len = yyleng;
//zendlval->type = IS_STRING;
return T_ENCAPSED_AND_WHITESPACE;
}
if (stateTest(ST_DOUBLE_QUOTES, "\"")) {
pushState(ST_IN_SCRIPTING);
return '\"';
}
if (stateTest(ST_BACKQUOTE,"`")) {
pushState(ST_IN_SCRIPTING);
return '`';
}
if (stateTest(ST_SINGLE_QUOTE,"'")) {
pushState(ST_IN_SCRIPTING);
return '\'';
}
if (isEof() && (
isState(ST_DOUBLE_QUOTES) ||
isState(ST_BACKQUOTE) ||
isState(INITIAL) ||
isState(ST_IN_SCRIPTING) ||
isState(ST_LOOKING_FOR_PROPERTY))) {
return 0;
}
if (isEof() && (
isState(ST_COMMENT) ||
isState(ST_DOC_COMMENT)))
//zend_error(E_COMPILE_WARNING,"Unterminated comment starting line %d", CG(comment_start_line));
return 0;
}
//{ANY_CHAR} {
// zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
//}
writefln("Unexpected character in input: '" , currentChar() ,
" (ASCII=" , cast(int)currentChar(),
") state=" , getState);
}
boolean stateTestRe(states s, String rege)
{
if (currentState != s) {
return false;
}
int y;
switch (rege) {
case "LABEL":
//LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*
char c0 = input[yypos];
if ( ((c0 >= 'a') && (c0 <='z')) ||
((c0 >= 'A') && (c0 <='Z')) ||
(c0 == '_') ||
((c0 >= \0x7f ) && (c0 <= \0xff))
) {
//c0_match = true;
} else {
return false;
}
y = yypos + 1;
// next chars..
while (y < input.length) {
char c0 = input[y];
if ( ((c0 >= 'a') && (c0 <='z')) ||
((c0 >= 'A') && (c0 <='Z')) ||
((c0 >= '0') && (c0 <='9')) ||
(c0 == '_') ||
((c0 >= \0x7f ) && (c0 <= \0xff))
) {
y++;
continue;
}
y--;
break;
}
yypos = y+1;
return true;
case "$LABEL":
char c0 = input[yypos];
if (c0 != '$') {
return false;
}
int yystart = yypos;
yypos++;
if (!stateTestRe(s, "LABEL")) {
yypos = yystart;
return false;
}
return true;
case "LNUM":
//[0-9]+
int y = yypos;
while (y < input.length) {
char c0 = input[y];
if ((c0 >= '0') && (c0 <='9')) {
y++;
continue;
}
y--;
break;
}
if (y <= yypos) {
return false;
}
yypos = y+1;
return true;
case "DNUM":
//DNUM ([0-9]*[\.][0-9]+)
//|([0-9]+[\.][0-9]*)
// eg. 0000.00000, .00000 , 000.
y = yypos;
int got_dot = false;
while (y < input.length) {
char c0 = input[y];
if ((c0 >= '0') && (c0 <='9')) {
y++;
continue;
}
if (!got_dot && c0=='.') {
got_dot = true;
y++;
continue;
}
y--;
break;
}
if (y <= yypos) {
return false;
}
if (((y - yypos) == 1) && (input[yypos] == '.')) {
return false;
}
yypos = y+1;
return true;
case "EXPONENT_DNUM": //(({LNUM}|{DNUM})[eE][+-]?{LNUM})
int ystart = yypos;
if (!this.stateTestRe(s, "LNUM")) {
yypos = ystart;
if (!this.stateTestRe(s, "DNUM")) {
yypos = ystart;
return false;
}
}
y = yypos;
char c0 = input[y];
if ((c0 != 'e') && (c0 != 'E')) {
yypos = ystart;
return false;
}
y++;
char c0 = input[y];
if ((c0 == '+') || (c0 == '-')) {
y++;
}
yypos = y;
if (!this.stateTestRe(s, "LNUM")) {
yypos = ystart;
return false;
}
// yypos should be ok!
return true;
case "HNUM" // "0x"[0-9a-fA-F]+
int y = yypos;
while (y < input.length) {
char c0 = input[y];
if ((y == yypos) && (c0 != '0')) {
return false;
}
if (y == yypos) {
y++;
continue;
}
if ((y == (yypos + 1)) && (c0 != 'x')) {
return false;
}
if (y == (yypos +1)) {
y++;
continue;
}
if ( ((c0 >= 'a') && (c0 <='f')) ||
((c0 >= 'A') && (c0 <='F')) ||
((c0 >= '0') && (c0 <='9')) ) {
yy++;
continue;
}
y--;
break;
}
if (y <= (yypos + 2)) {
return false;
}
yypos = y+1;
return true;
case "WHITESPACE":
// [ \n\r\t]+
int y = yypos;
while (y < input.length) {
char c0 = input[y];
if ((c0 == ' ') ||
(c0 == '\n') ||
(c0 == '\r') ||
(c0 == '\t')
) {
if (c0 == '\n') {
this.lines ++;
}
y++;
continue;
}
y--;
break;
}
if (y <= yypos) {
return false;
}
yypos = y+1;
return true;
case "TABS_AND_SPACES": // [ \t]* - really just shifts the yypos ..
int y = yypos;
while (y < input.length) {
char c0 = input[y];
if ((c0 == ' ') ||
(c0 == '\t')
) {
y++;
continue;
}
y--;
break;
}
yypos = y+1;
return true;
case "TOKENS": //[;:,.\[\]()|^&+-/*=%!~$<>?@]
if (isEof()) {
return false;
}
int y = yypos;
char c0 = input[y];
if ((c0 == ';') ||
(c0 == ':') ||
(c0 == ',') ||
(c0 == '.') ||
(c0 == '[') ||
(c0 == ']') ||
(c0 == '(') ||
(c0 == ')') ||
(c0 == '|') ||
(c0 == '^') ||
(c0 == '&') ||
(c0 == '+') ||
(c0 == '-') ||
(c0 == '/') ||
(c0 == '*') ||
(c0 == '=') ||
(c0 == '%') ||
(c0 == '!') ||
(c0 == '~') ||
(c0 == '$') ||
(c0 == '<') ||
(c0 == '>') ||
(c0 == '?') ||
(c0 == '@')) {
yypos++;
return true;
}
return false;
case "ENCAPSED_TOKENS": //[\[\]{}$]
if (isEof()) {
return false;
}
int y = yypos;
char c0 = input[y];
if ((c0 == '[') ||
(c0 == ']') ||
(c0 == '{') ||
(c0 == '}') ||
(c0 == '$') ) {
yypos++;
return true;
}
return false;
case "ESCAPED_AND_WHITESPACE": //[\n\t\r #'.:;,()|^&+-/*=%!~<>?@]+
int y = yypos;
while (y < input.length) {
char c0 = input[y];
if ((c0 == '\n') ||
(c0 == '\t') ||
(c0 == '\r') ||
(c0 == ' ') ||
(c0 == '#') ||
(c0 == '\'') ||
(c0 == '.') ||
(c0 == ':') ||
(c0 == ';') ||
(c0 == ',') ||
// (c0 == '[') ||
// (c0 == ']') ||
(c0 == '(') ||
(c0 == ')') ||
(c0 == '|') ||
(c0 == '^') ||
(c0 == '&') ||
(c0 == '+') ||
(c0 == '-') ||
(c0 == '/') ||
(c0 == '*') ||
(c0 == '=') ||
(c0 == '%') ||
(c0 == '!') ||
(c0 == '~') ||
//(c0 == '$') ||
(c0 == '<') ||
(c0 == '>') ||
(c0 == '?') ||
(c0 == '@')) {
if (c0 == '\n') {
this.lines ++;
}
y++;
continue;
}
y--;
break;
}
if (y <= yypos) {
return false;
}
yypos = y+1;
return true;
case "ANY_CHAR":
if (y < input.length) {
yypos ++;
return true;
}
return false;
case "NEWLINE": // ("\r"|"\n"|"\r\n")
if (y >= input.length) {
return false;
}
char c0 = input[y];
if ((y+1) < input.length) {
char c1 = input[y+1];
if ((c0 == '\r') && (c1 == '\n')) {
yypos +=2;
return true;
}
}
if (((c0 == '\n') ||
(c0 == '\r'))) {
if (c0 == '\n') {
this.lines ++;
}
yypos++;
return true;
}
return false;
case "TABS_AND_SPACES(int|integer)TABS_AND_SPACES":
int yystart = yypos;
stateTestRe(s, "TABS_AND_SPACES");
int yyw = yypos;
if (!stateTest(s,"int") && !stateTest(s,"integer")) {
yypos = yystart;
return false;
}
stateTestRe(s, "TABS_AND_SPACES");
return true;
case "TABS_AND_SPACESstringTABS_AND_SPACES":
int yystart = yypos;
stateTestRe(s, "TABS_AND_SPACES");
int yyw = yypos;
if (!stateTest(s,"string")) {
yypos = yystart;
return false;
}
stateTestRe(s, "TABS_AND_SPACES");
return true;
case "TABS_AND_SPACESarrayTABS_AND_SPACES":
int yystart = yypos;
stateTestRe(s, "TABS_AND_SPACES");
int yyw = yypos;
if (!stateTest(s,"array")) {
yypos = yystart;
return false;
}
stateTestRe(s, "TABS_AND_SPACES");
return true;
case "TABS_AND_SPACESobjectTABS_AND_SPACES":
int yystart = yypos;
stateTestRe(s, "TABS_AND_SPACES");
int yyw = yypos;
if (!stateTest(s,"object")) {
yypos = yystart;
return false;
}
stateTestRe(s, "TABS_AND_SPACES");
return true;
case "TABS_AND_SPACESbool|booleanTABS_AND_SPACES":
int yystart = yypos;
stateTestRe(s, "TABS_AND_SPACES");
int yyw = yypos;
if (!stateTest(s,"bool") && !stateTest(s,"boolean")) {
yypos = yystart;
return false;
}
stateTestRe(s, "TABS_AND_SPACES");
return true;
case "TABS_AND_SPACESunsetTABS_AND_SPACES":
int yystart = yypos;
stateTestRe(s, "TABS_AND_SPACES");
int yyw = yypos;
if (!stateTest(s,"unset")) {
yypos = yystart;
return false;
}
stateTestRe(s, "TABS_AND_SPACES");
return true;
case "STANDARD_START":
//""|
// "