Here's a mock up lexical scanner routine that would sit at the bottom of the compiler/parser.
[pbcode]
ac = 1
constant Token_UNKNOWN =ac(1)
constant Token_INTEGER =ac(1)
constant Token_TEXT =ac(1)
constant Token_OPERATOR_ADD =ac(1)
constant Token_OPERATOR_SUB =ac(1)
constant Token_OPERATOR_MULT =ac(1)
constant Token_OPERATOR_DIV =ac(1)
constant Token_ENDOFFILE =ac(1)
Type tParser
SourcePointer ; Current Position (character) within the source
CurrentToken ; a numeric value of this 'token' so we can just compare values and not strings
CurrentTokenText$ ; The 'string' of the this token.
EndTYPE
; Declare the global Parser structure
Dim Parser as tParser
Parser = new tParser
; declare our source code
global SourceCode$ = " stuff 10 + 20 - 30 "
print "SOURCE CODE:"+SourceCode$
print ""
print ""
For lp=0 to 100
; get the next token
ThisTOKEN=GetNextToken()
#print "TOKEN:"+STR$(ThisTOKEN)
select ThisTOKEN
case Token_INTEGER
print " Integer= "+ Parser.CurrentTokenText$
case Token_TEXT
print " Text= "+ Parser.CurrentTokenText$
case Token_OPERATOR_ADD,Token_OPERATOR_SUB, Token_OPERATOR_DIV, Token_OPERATOR_MULT
print "Operator= "+ Parser.CurrentTokenText$
endselect
if ThisTOKEN = Token_ENDOFFILE
exitfor
endif
next
sync
waitkey
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//------------------------------>> GetNextToken <<-----------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
Function GetNextToken()
; Set Token to
Pos=Parser.SourcePointer
// check if
if (pos+1)>=Len(SourceCode$)
CurrentToken = Token_ENDOFFILE
print "DONE"
goto done
endif
ThisCHR = mid(SourceCode$,1+pos)
// is this a white space chr ?
if THISCHR = 32 or ThisCHR=9
// Skip WHITE SPACE
For lp = pos to Len(SourceCode$)
ThisCHR = mid(SourceCode$ , 1+lp)
if thisCHR<>32 and ThisCHR<>9
pos =lp
exitfor
endif
next
ThisCHR = mid(SourceCode$,1+pos)
Parser.SourcePointer=pos
endif
// ----------------------------------------------
// check if the next 'hard' character is a number
// ----------------------------------------------
if ThisCHR=>asc("0") and ThisCHR<=asc("9")
// ----------------------------------------------
Number$=""
CurrentTOKEN = Token_INTEGER
For lp = pos to Len(SourceCode$)-1
ThisCHR = mid(SourceCode$,1+lp)
if ThisCHR=>asc("0") and ThisCHR<=asc("9")
Number$+=Chr$(thisCHR)
else
Parser.SourcePointer = lp
Parser.CurrentTokenText$= Number$
;print Number$
exitfor
endif
next
goto DONE
endif
// ----------------------------------------------
// check if the next 'hard' character is a word
// ----------------------------------------------
if (ThisCHR=>asc("a") and ThisCHR<=asc("z") ) or (ThisCHR=>asc("A") and ThisCHR<=asc("Z") )
// ----------------------------------------------
ThisTEXT$=""
CurrentTOKEN = Token_TEXT
For lp = pos to Len(SourceCode$)-1
ThisCHR = mid(SourceCode$,1+lp)
if (ThisCHR=>asc("a") and ThisCHR<=asc("z") ) or (ThisCHR=>asc("A") and ThisCHR<=asc("Z") )
ThisTEXT$+=Chr$(thisCHR)
else
Parser.SourcePointer = lp
Parser.CurrentTokenText$= ThisTEXT$
exitfor
endif
next
goto DONE
endif
// ----------------------------------------------
// ----------------------------------------------
if ThisCHR=asc("+")
// ----------------------------------------------
CurrentTOKEN = Token_OPERATOR_ADD
Parser.SourcePointer = Pos+1
Parser.CurrentTokenTEXT = "+"
goto done
endif
// ----------------------------------------------
// ----------------------------------------------
if ThisCHR=asc("-")
// ----------------------------------------------
CurrentTOKEN = Token_OPERATOR_SUB
Parser.SourcePointer = Pos+1
Parser.CurrentTokenTEXT = "-"
goto done
endif
// ----------------------------------------------
// ----------------------------------------------
if ThisCHR=asc("/")
// ----------------------------------------------
CurrentTOKEN = Token_OPERATOR_DIV
Parser.SourcePointer = Pos+1
Parser.CurrentTokenTEXT = "/"
goto done
endif
// ----------------------------------------------
// ----------------------------------------------
if ThisCHR=asc("*")
// ----------------------------------------------
CurrentTOKEN = Token_OPERATOR_MULT
Parser.SourcePointer = Pos+1
Parser.CurrentTokenTEXT = "*"
goto done
endif
// Move to the next character in the source code
// if this character is unknown
Parser.SourcePointer =pos+1
// Return an unknown token ID for this, since the parser doesn't
// Understand it.
CurrentToken = Token_UNKNOWN
Done:
Parser.CurrentToken = CurrentToken
EndFunction CurrentToken
[/pbcode]
Related links:
* Simple Formula Evaluation / Compilers (http://www.underwaredesign.com/forums/index.php?topic=214.0)