|
|
|
|
||
|
DEFINITION MODULE Scanner; (******************************************************************* Module Scanner (Version 1.0) Copyright (c) 1994-2006 by Dimitrios Gyalistras, Juerg Thoeny and ETH Zurich. Purpose Maintains several symbol tables and scans sequentially the contents of a string buffer or a file. Remarks - The symbols recognized by the scanner are a set of predefined symbols (see CONST-declarations below), and (2) the symbols in the currently used symbol table. - The following definitions hold: 1. INTEGER = [0-9]+ 2. LONGINT = [0-9]+ "D" 3. REAL = [0-9]+ "." [0-9]+ (("E"|"e")[+-]?[0-9]+)? 4. LONGREAL = [0-9]+ "." [0-9]+ (("D"|"d")[+-]?[0-9]+)? 5. IDENTIFIER = [a-zA-Z]+ [_a-zA-Z0-9]* 6. STRING = ('.*')|(".*") Above definitions are given in regular expression notation,i.e.: exp? = 0 or 1 occurrence of exp, exp* = 0..N occurrences of exp, exp+ = 1..N occurrences of exp, "str" = mandatory occurrence of string str. (exp) = contents of bracket are grouped exp1|exp2 = exp1 or exp2 . = any character except newline [minCh-maxCh] = a single char from the set of characters from minCh to maxCh NOTE: INTEGER and REAL carry NO SIGN ("+"/"-"). The sign must be scanned separately (sym=specialCharSym). Reading of signed numbers is supported by procedures TryXYZ (see below). - White spaces are all characters <=' ' and are skipped. - Symbols of the types 1-6 must be separated by white spaces or special characters, i.e. can not follow immediately upon each other. - Special characters are all non-"white space" characters, which are not attributed to a symbol of type 1-6. (e.g. "+" or "-" before a REAL, "/", ":" etc.) - The scanner skips comments starting with "(*" and closing with "*)". Recursive comments are possible. Programming o Design Dimitrios Gyalistras 04/01/1994 Juerg Thoeny 04/01/1994 o Implementation Dimitrios Gyalistras 04/01/1994 ETH Zurich Systems Ecology CHN E 35.1 Universitaetstrasse 16 8092 Zurich SWITZERLAND URLs: <mailto:RAMSES@env.ethz.ch> <http://www.sysecol.ethz.ch> <http://www.sysecol.ethz.ch/SimSoftware/RAMSES> Last revision of definition: 20/03/2003 AF *******************************************************************) CONST MaxChars = 1024; LgthResWord = 32; (* maximum length of a reserved word in a symbol table *) MaxResWords = 128; (* maximum number of reserved words per symbol table *) TYPE SymTable; Symbol = INTEGER; Str1024 = ARRAY [0..MaxChars-1] OF CHAR; Str256 = ARRAY [0..256-1] OF CHAR; CONST (* symbols common to all symbol tables *) nul = 0; (* a bad file or number syntax or an overflow occurred *) eodSym = -1; (* end of data or of file reached *) unknownIdent = -2; (* token has syntax of an ident, but is not in the symbol table*) integerSym = -3; (* an integer number *) longIntSym = -4; (* a longinteger number *) realSym = -5; (* a real number *) longRealSym = -6; (* a longreal number *) strSym = -7; (* a string *) specialCharSym = -8; (* a non-"white space" character, given that none of the above symbols can be returned *) (* the following type is used ot indicate the reason for sym=nul *) TYPE ScannerError = ( noError, unexpectedEOD, (* unexpected end of data/file reached *) badSeparation, (* Two symbols of type 1-6 were not separated by a white space or a special character *) badRealSyntax, (* A (long)real number with bad syntax was encountered *) badIdentSyntax, (* Bad syntax within an ident was encountered *) numOverflow, (* sym = (integerSym, longIntSym, realSym, or longRealSym) was detected, but the resulting number was too large *) identOverflow, (* (sym = unknownIdent was detected, but the ident was too long (>LgthResWord) *) strOverflow, (* sym = strSym was detected, but the read string was longer then MaxChars. In this case, curStr will contain only the first MaxChars+1 of the string. *) strContainsEOL, (* sym = strSym was detected, but an EOL-character was found within the string*) commentNotClosed (* a comment was opened and never closed *) ); (*--------------------------------------------------------------------------*) VAR str : Str1024; (* The last scanned token *) sym : Symbol; (* Symbolic meaning of the token *) scanErr : ScannerError; (* Will be <>noError if sym=nul *) scanErrStr : Str256; (* Will contain an error text, if error<>noError *) int : INTEGER; lint : LONGINT; (* Will contain str converted to integer (longInt), if sym = integerSym (longIntSym). *) real : REAL; lreal : LONGREAL; (* Will contain str converted to real (longreal), if sym = realSym (longRealSym). *) whiteChFollows: BOOLEAN ; (* Will be true, if the symbol is followed by a white character *) (*--------------------------------------------------------------------------*) PROCEDURE IsIdentifier( str: ARRAY OF CHAR ): BOOLEAN; (* Returns TRUE if str has an identifier syntax. *) PROCEDURE IsAllowedResWord( str: ARRAY OF CHAR ): BOOLEAN; (* Returns TRUE if str has an identifier syntax, and its length is less than LgthResWord. *) VAR notDeclaredSymTable: SymTable; (* read only *) PROCEDURE NewSymTable( VAR symTbl: SymTable; VAR errTxt: ARRAY OF CHAR ):BOOLEAN ; (* Instanciates a new symbol table. If this is not possible, FALSE is returned and an error message is given in errTxt If symTbl already exists, its contents are cleared. *) PROCEDURE RemoveSymTable( VAR symTbl: SymTable ); (* Removes symTbl if it exists. *) PROCEDURE InsertSymbol( symTbl : SymTable; resWrd : ARRAY OF CHAR; sym : Symbol; VAR errTxt: ARRAY OF CHAR):BOOLEAN ; (* Inserts reserved word resWrd associated with symbol sym in symTbl. If this is not possible, FALSE is returned and an error message is given in errTxt. *) (*--------------------------------------------------------------------------*) PROCEDURE InitFileScan(fileName : ARRAY OF CHAR; symTbl : SymTable; VAR errTxt: ARRAY OF CHAR): BOOLEAN; (* Prepare scanning of file fileName using symbol table symTbl. From here on, calling GetSym (see below) will operate on this file this file. NOTE: The file is opened for scanning and is left open until StopScanner is called succesfully. *) PROCEDURE InitBufferScan(VAR buff : ARRAY OF CHAR; symTbl : SymTable; VAR errTxt: ARRAY OF CHAR): BOOLEAN; (* Prepare scanning of buffer buff using symbol table symTbl. From here on, calling GetSym (see below) will operate on this buffer. NOTE: The buffer variable passed should persist after calling this procedure, as long as its contents are scanned. Its content may adhere to the end of line convention according to the currently running platform (as returned by routines RunsOnAUnixMachine, RunsOnAnIBMPC, and RunsOnAUnixMachine from DMSystem) or may mark end of lines with the Modula-2 symbol EOL from module DMFiles in any combination. *) PROCEDURE StopScanner; (* Closes a file possibly opened by a call to InitFileScan and disables GetSym (see below), i.e. sym=nul is always returned. NOTE: This procedure must be called before calling Init...Scan a second time. *) (*--------------------------------------------------------------------------*) PROCEDURE SetSymTable( symTbl : SymTable; VAR errTxt: ARRAY OF CHAR): BOOLEAN; (* Sets the currently used symbol table for scanning to symTbl. A call to this procedure will have no effect, if the scanner is currently stopped. *) PROCEDURE CurSymTable(): SymTable; (* Returns the current symbol table. *) PROCEDURE SetCaseSensitivity( cs: BOOLEAN ); (* Sets whether identifiers are scanned case sensitively. The default is TRUE. NOTE: In case that case sensitivity is switched off, make sure that reserved words in the symbol table contain ONLY capitalized characters, otherwise the scanner will not operate correctly! *) (*--------------------------------------------------------------------------*) PROCEDURE GetSym; (* Returns the next symbol read from the current input (file or buffer) into the variable sym and sets the variables str, int or real to the newest value(s). *) PROCEDURE ReGetSym; (* Forces that next call to GetSym will return the last read symbol. The 2dn next call to GetSym will then continue normally. *) PROCEDURE GetResWord( symTbl: SymTable; sym: Symbol; VAR str: ARRAY OF CHAR ); (* Returns a descriptor of the predefined symbol sym (in this case symTbl is irrelevant) or the reserved word for the symbol sym from table symTbl. *) PROCEDURE GetLineCount( VAR lineNr: LONGINT ) ; (* Returns line of last scanned symbol. *) PROCEDURE GetCharCount( VAR charPos: INTEGER ); (* Returns position of last read character on current line. *) (*--------------------------------------------------------------------------*) (* The following procedures are particularly usefull for reading data files. All procedures attempt to read a respective predefined symbol followed by a white character. If succesfull, the result is retuned in the formal parameter. The respective variables (i.e. str, int, lint, real, lreal). NOTE: Signed or unsigned numbers are recognized. The respective symbol is also returned in variable sym. The global variables int, lint, real, lreal (see above) will however NOT have the correct sign. *) PROCEDURE TryGetString ( VAR ss: ARRAY OF CHAR ):BOOLEAN; PROCEDURE TryGetInteger( VAR ii: INTEGER ):BOOLEAN; PROCEDURE TryGetLongInt( VAR li: LONGINT ):BOOLEAN; PROCEDURE TryGetReal ( VAR rr: REAL ):BOOLEAN; PROCEDURE TryGetLongReal( VAR lr: LONGREAL ):BOOLEAN; (*--------------------------------------------------------------------------*) PROCEDURE TryGetText( untilSym: Symbol; VAR txt: ARRAY OF CHAR ):INTEGER; (* Reads into variable txt all text found from the current position until symbol untilSym. Returns -1 if EOD was reached before finding untilSym, 0 if everything was all right, and +1 if reading is stopped because txt is full. *) END Scanner.
|
||
|
|
|