|
// +----------------------------------------------------------------------+
//
// $Id: Tokenizer.php,v 1.9 2004/05/22 05:32:54 alan_k Exp $
//
// global map array which is used if the tokenizer items are number
// differently than the compiled
/**
* The tokenizer wrapper for parser - implements the 'standard?' yylex interface
*
* 2 main methods:
*
* - constructor, which takes the data to parse
* calls php's internal tokenizer, then tidies up the array
* a little (key=>value) rather than mixed type.
* - advance, which returns true while tokens are available
* - sets {@link $value}
* - sets {@link $token}
*
* - parseError, which returns a string to appear on parser error messages.
* (could also display some of the code that has an error)
*
*
* uses a few flags like:
* - {@link $line} - current line number
* - {@link $pos} - current token id
* - {@link $N} - total no. of tokens
* @version $Id: Tokenizer.php,v 1.9 2004/05/22 05:32:54 alan_k Exp $
*/
class PintCompiler_Tokenizer {
/**
* Debugging on/off
*
* @var boolean
* @access public
*/
var $debug = false;
/**
* Tokens - array of all the tokens.
*
* @var array
* @access public
*/
var $tokens;
/**
* Total Number of tokens.
*
* @var int
* @access public
*/
var $N = 0;
/**
* Current line.
*
* @var int
* @access public
*/
var $line;
/**
* Current token position.
*
* @var int
* @access public
*/
var $pos = -1;
/**
* The current token (either a ord(';') or token numer - see php tokenizer.
*
* @var int
* @access public
*/
var $token;
/**
* The value associated with a token - eg. for T_STRING it's the string
*
* @var string
* @access public
*/
var $value;
/**
* Constructor
*
* Load the tokenizer - with a string to tokenize.
* tidies up array, sets vars pos, line, N and tokens
*
* @param string PHP code to serialize
*
*
* @return none
* @access public
*/
function PintCompiler_Tokenizer()
{
$a = func_get_args();
call_user_func_array(array(&$this, '__construct'), $a);
}
function __construct($data)
{
if (!$data) {
return;
}
$this->tokens = token_get_all($data);
$this->N = count($this->tokens);
for ($i=0;$i<$this->N;$i++) {
if (!is_array($this->tokens[$i])) {
$this->tokens[$i] = array(ord($this->tokens[$i]),$this->tokens[$i]);
}
}
$this->pos = -1;
$this->line = 1;
if (!isset($GLOBALS['_'.__CLASS__]['map'])) {
$this->buildMap();
}
}
/**
* The main advance call required by the parser
*
* return true if a token is available, false if no more are available.
* skips stuff that is not a valid token
* stores lastcomment, lastcommenttoken
*
*
* @return boolean - true = have tokens
* @access public
*/
function advance()
{
$this->pos++;
while ($this->pos < $this->N) {
if ($this->debug) {
echo token_name($this->tokens[$this->pos][0]). '(' .
(isset($GLOBALS['_PINTCOMPILER_PARSER']['yyName'][$GLOBALS['_'.__CLASS__]['map'][$this->tokens[$this->pos][0]]]) ?
$GLOBALS['_PINTCOMPILER_PARSER']['yyName'][$GLOBALS['_'.__CLASS__]['map'][$this->tokens[$this->pos][0]]] :
$GLOBALS['_PINTCOMPILER_PARSER']['yyName'][$this->tokens[$this->pos][0]])
.')' ." : {$this->tokens[$this->pos][1]}\n";
}
static $T_DOC_COMMENT = false;
if (!$T_DOC_COMMENT) {
$T_DOC_COMMENT = defined('T_DOC_COMMENT') ? constant('T_DOC_COMMENT') : 10000;
}
switch ($this->tokens[$this->pos][0]) {
// simple ignore tags.
case T_CLOSE_TAG:
case T_OPEN_TAG_WITH_ECHO:
$this->pos++;
continue;
// comments - store for phpdoc
case $T_DOC_COMMENT;
case T_COMMENT:
$this->line += substr_count ($this->tokens[$this->pos][1], "\n");
$this->pos++;
continue;
// ... continues into m/l skipeed tags..
// large
case T_OPEN_TAG:
case T_INLINE_HTML:
case T_WHITESPACE:
$this->line += substr_count ($this->tokens[$this->pos][1], "\n");
$this->pos++;
continue;
//--- begin returnable values--
// end statement - clear any comment details.
case 59; // ord(';'):
// everything else!
default:
$this->line += substr_count ($this->tokens[$this->pos][1], "\n");
$this->token = $this->tokens[$this->pos][0];
$this->value = $this->tokens[$this->pos][1];
// map token to something else if the tokenizer doesnt return the same numbers as the
// parser does not match the numbers defined in parser.jay..
if ($GLOBALS['_'.__CLASS__]['map'] && isset($GLOBALS['_'.__CLASS__]['map'][$this->token])) {
$this->token = $GLOBALS['_'.__CLASS__]['map'][$this->token];
}
$this->debug("ADVANCE: {$this->token} : {$this->value}");
return true;
}
}
$this->debug("ADVANCE: EOF!");
//echo "END OF FILE?";
return false;
}
/**
* return something useful, when a parse error occurs.
*
* used to build error messages if the parser fails, and needs to know the line number..
*
* @return string
* @access public
*/
function parseError()
{
return "Error at line {$this->line}";
}
/**
* build a map if the token arrays do not match.
*
*
* @return none
* @access public
* @static
*/
function buildMap()
{
if (isset($GLOBALS['_'.__CLASS__]['map'])) {
return;
}
require_once 'PintCompiler/Parser.php';
$start = (token_name(257) == 'UNKNOWN') ? 258 : 257;
$map = array();
$hash = @array_flip($GLOBALS['_PINTCOMPILER_PARSER']['yyName']);
for ($i=$start;$i< count($GLOBALS['_PINTCOMPILER_PARSER']['yyName']) + $start - 257;$i++) {
$lt = token_name($i);
if ($lt == 'T_OLD_FUNCTION') {
continue;
}
$lt = ($i - ($start - 257) == 350 && $lt == 'UNKNOWN') ? 'T_INTERFACE' : $lt;
$lt = ($i - ($start - 257) == 352 && $lt == 'UNKNOWN') ? 'T_IMPLEMENTS' : $lt;
if ($lt == 'UNKNOWN') {
break;
}
$lt = ($lt == 'T_ML_COMMENT') ? 'T_COMMENT' : $lt;
$lt = ($lt == 'T_DOUBLE_COLON') ? 'T_PAAMAYIM_NEKUDOTAYIM' : $lt;
// echo "$lt has hash? ".$hash[$lt]."\n";
// continue;
//echo "compare $lt with {$tokens[$i]}\n";
if ($GLOBALS['_PINTCOMPILER_PARSER']['yyName'][$i] != $lt) {
$map[$i] = $hash[$lt];
}
}
// exit;
//print_r($map);
// set the map to false if nothing in there.
$GLOBALS['_'.__CLASS__]['map'] = (count($map) ? $map : false);
}
function debug($str) {
if (1) {
return;
}
echo "$str\n";
}
}
PintCompiler_Tokenizer::buildMap();
?>