Dynalib Utils
Tokenizer.h
Go to the documentation of this file.
1 //
2 // Created by Ken Kopelson on 8/11/17.
3 //
4 
5 #ifndef TOKENIZER_H
6 #define TOKENIZER_H
7 
8 
9 #include "../String.h"
10 #include "../IntWrapper.h"
11 
12 class Token;
13 class TokenizerContext;
14 struct TokenizerState;
15 struct ErrorContext;
16 
17 class Tokenizer {
18  const String _text;
19  int _currPosition = -1;
20  int _linePosition = 0;
21  int _lineStart = 0;
22  int _indentPosition = 0;
23  Token* _currToken = nullptr;
24  char _currChar = '\0';
25  char _peekChar = '\0';
26  bool _errorFlag = false;
27  int _errorCount = 0;
28  bool _errorStop = false;
29  int _lineNumber = 0;
30  bool _contextCreated = false;
31 
32  TokenizerContext* _context = nullptr;
33  ErrorContext* _errorContext = nullptr;
34 
35 public:
36  Tokenizer(TokenizerContext* context, const String& text);
37  explicit Tokenizer(TokenizerContext* context);
38  explicit Tokenizer(const String& text);
39  Tokenizer();
40  virtual ~Tokenizer();
41 
42  void init();
43  void setContext(TokenizerContext* context);
45  void setText(const String& text);
46  void setText(String& text);
47  void setCaseSensitive(bool caseSensitive);
48  bool isCaseSensitive();
49  void setGetCharLits(bool getCharLits);
50  bool isGetCharLits();
51  void setGetStrings(bool getStrings);
52  bool isGetStrings();
53  void setAllowSingleQuotes(bool allowSingleQuotes);
54  bool isAllowSingleQuotes();
55  void setGetUnknown(bool getUnknown);
56  bool isGetUnknown();
57  void setGetCRLF(bool getCRLF);
58  bool isGetCRLF();
59  void setGetSpaces(bool getSpaces);
60  bool isGetSpaces();
61  void setAllowNumCommas(bool allowNumCommas);
62  bool isAllowNumCommas();
63 
64  void resetScan();
65  Integer* findKeyword(String* keyword);
66  Integer* findKeyword(String keyword);
67  bool addKeyword(String* keyword, int code);
68  Integer* findSingleOp(Char* singleOp);
69  Integer* findSingleOp(Char singleOp);
70  bool addSingleOp(Char* singleOp, int code);
71  Integer* findMultiOp(String* multiOp);
72  Integer* findMultiOp(String multiOp);
73  bool addMultiOp(String* multiOp, int code);
74  bool findIdentChar(Char* char1);
75  bool findIdentChar(Char char1);
76  void addIdentChars(const String& multiChars);
77  void clearIdentChars();
78  void setIdentChars(const String& multiChars);
79 
80  bool hasNextChar(int index = 1);
81  char peekNextChar(int index = 1);
82  char peekCurrChar();
83  bool isEOS();
84  bool fetchToken(Token& token);
85  int getCurrPosition();
86  void setIndentHere();
87  void setIndentPosition(int indentPos);
88  int getIndentPosition();
89  bool hasIndented(int indentPos);
90  bool hasOutdented(int indentPos);
91  void resetIndent();
94  bool isError();
96  void setErrorContext(ErrorContext* errorContext);
98  int getErrorCount();
101  void deleteState(TokenizerState* state);
102  void restartFromToken(Token& token);
103  void error(String msg);
104 
105 private:
106  bool _isBinaryClass(char ch);
107  bool _isDigitClass(char ch);
108  bool _isAlphaClass(char ch);
109  bool _isHexClass(char ch);
110  bool _isIdentClass(char ch);
111 
112  bool _findMultiOpChar(Char* char1);
113  bool _findMultiOpChar(Char char1);
114  void _advanceLine();
115  char _getNextChar();
116  void _appendCurrChar();
117  void _appendNextChar();
118  void _appendNextChars(int count);
119  void _restartToken();
120  bool _getNextToken();
121  char _skipSpaces();
122  void _skipCRLF();
123  bool _skipComment();
124  bool _checkForIdent();
125  bool _checkForOperator();
126  bool _checkForMultiOp();
127  bool _checkForCharLiteral();
128  bool _checkForQuote();
129  bool _completeQuote(char quoteChar);
130  void _processEscape();
131  char _getEscNumber();
132  void _calcDoubleValue();
133  bool _checkForNumber();
134  void _initNumberScan();
135  void _doNumber();
136  bool _doInfNan();
137  void _doInteger();
138  void _doFraction();
139  void _doExponent();
140  void _doHex();
141  void _doBinary();
142  void _doOctal();
143 };
144 
145 
146 #endif //TOKENIZER_H
void addIdentChars(const String &multiChars)
Definition: Tokenizer.cpp:203
Definition: TokenizerContext.h:14
String getTokenLineSegment()
Definition: Tokenizer.cpp:275
Definition: Tokenizer.h:17
bool isGetCRLF()
Definition: Tokenizer.cpp:124
void deleteState(TokenizerState *state)
Definition: Tokenizer.cpp:334
bool findIdentChar(Char *char1)
Definition: Tokenizer.cpp:195
void setIdentChars(const String &multiChars)
Definition: Tokenizer.cpp:211
bool isGetStrings()
Definition: Tokenizer.cpp:100
void resetIndent()
Definition: Tokenizer.cpp:267
void resetScan()
Definition: Tokenizer.cpp:144
void setAllowNumCommas(bool allowNumCommas)
Definition: Tokenizer.cpp:136
void setText(const String &text)
Definition: Tokenizer.cpp:70
bool isAllowSingleQuotes()
Definition: Tokenizer.cpp:108
Definition: String.h:60
Definition: ErrorContext.h:11
TokenizerContext * getContext()
Definition: Tokenizer.cpp:66
int getCurrPosition()
Definition: Tokenizer.cpp:243
int getIndentPosition()
Definition: Tokenizer.cpp:255
void setGetCharLits(bool getCharLits)
Definition: Tokenizer.cpp:88
Integer * findKeyword(String *keyword)
Definition: Tokenizer.cpp:159
void init()
Definition: Tokenizer.cpp:44
void error(String msg)
Definition: Tokenizer.cpp:1043
bool isGetUnknown()
Definition: Tokenizer.cpp:116
Definition: TokenizerState.h:10
bool isError()
Definition: Tokenizer.cpp:279
bool hasOutdented(int indentPos)
Definition: Tokenizer.cpp:263
char peekNextChar(int index=1)
Definition: Tokenizer.cpp:219
void setCaseSensitive(bool caseSensitive)
Definition: Tokenizer.cpp:80
bool addKeyword(String *keyword, int code)
Definition: Tokenizer.cpp:167
void setErrorContext(ErrorContext *errorContext)
Definition: Tokenizer.cpp:287
Token * getCurrToken()
Definition: Tokenizer.cpp:271
Integer * findSingleOp(Char *singleOp)
Definition: Tokenizer.cpp:171
void setGetCRLF(bool getCRLF)
Definition: Tokenizer.cpp:120
bool addSingleOp(Char *singleOp, int code)
Definition: Tokenizer.cpp:179
void restartFromToken(Token &token)
Definition: Tokenizer.cpp:339
void setGetStrings(bool getStrings)
Definition: Tokenizer.cpp:96
bool fetchToken(Token &token)
Definition: Tokenizer.cpp:232
ErrorContext * getErrorContext()
Definition: Tokenizer.cpp:283
void setAllowSingleQuotes(bool allowSingleQuotes)
Definition: Tokenizer.cpp:104
bool isGetSpaces()
Definition: Tokenizer.cpp:132
bool isCaseSensitive()
Definition: Tokenizer.cpp:84
void setIndentPosition(int indentPos)
Definition: Tokenizer.cpp:251
int getErrorCount()
Definition: Tokenizer.cpp:298
String getErrorLineSegment()
Definition: Tokenizer.cpp:291
bool isGetCharLits()
Definition: Tokenizer.cpp:92
void clearIdentChars()
Definition: Tokenizer.cpp:207
Definition: Token.h:35
Tokenizer()
Definition: Tokenizer.cpp:33
void setContext(TokenizerContext *context)
Definition: Tokenizer.cpp:56
bool isEOS()
Definition: Tokenizer.cpp:228
void setIndentHere()
Definition: Tokenizer.cpp:247
char peekCurrChar()
Definition: Tokenizer.cpp:224
bool hasNextChar(int index=1)
Definition: Tokenizer.cpp:215
bool isAllowNumCommas()
Definition: Tokenizer.cpp:140
Integer * findMultiOp(String *multiOp)
Definition: Tokenizer.cpp:183
bool hasIndented(int indentPos)
Definition: Tokenizer.cpp:259
bool addMultiOp(String *multiOp, int code)
Definition: Tokenizer.cpp:191
void setGetUnknown(bool getUnknown)
Definition: Tokenizer.cpp:112
virtual ~Tokenizer()
Definition: Tokenizer.cpp:37
Token * restoreState(TokenizerState *state)
Definition: Tokenizer.cpp:318
void setGetSpaces(bool getSpaces)
Definition: Tokenizer.cpp:128
TokenizerState * saveState()
Definition: Tokenizer.cpp:302