001 package plugins.JavaRolePluginModule;
002
003 import java.util.*;
004
005 /** Java source code parser
006 * @author Mikael S?ödergren, modified and
007 * @version 0.05
008 *
009 * Fixed:
010 * 1. The reduction of matching curls and parenthesis works!
011 * 2. The generation of a tree
012 * 3. Added support for the constructor static { ... }
013 * 4. Smarter detection of class, interface, import and package
014 */
015 public class Parser {
016 private Entity currentEntity;
017 private Entity previousEntity;
018 private String file;
019 private int pos;
020
021 public Parser(String file) {
022 this.file=file;
023 pos=0;
024 }
025
026 /** Att g?öra: M?åste hantera ov?äntade filslut */
027
028 public Entity getNextEntity() {
029 previousEntity=currentEntity;
030 currentEntity=new Entity(file);
031 currentEntity.startPos=pos;
032 eatWhitespace();
033 currentEntity.contentStartPos=pos;
034
035 /* Check if this is the first call of this method */
036 /*if(previousEntity==null) {
037 eatWhitespace();
038 if(currentEntity.startPos!=pos) {
039 currentEntity.type=Entity.EXTRA;
040 currentEntity.endPos=currentEntity.startPos;
041 currentEntity.whitespaceEndPos=pos;
042 return currentEntity;
043 }
044 }*/
045 if(pos<file.length()) {
046 char c=file.charAt(pos);
047 switch(c) {
048 case 'i':
049 case 'p':
050 if(isImport() || isPackage()) {
051 moveBeyondSemicolon();
052 currentEntity.endPos=pos;
053 currentEntity.type=Entity.EXTRA;
054 return currentEntity;
055 }
056 break;
057 case 's':
058 if(file.regionMatches(pos+1,"tatic",0,5)) {
059 pos+=6;
060 eatWhitespace();
061 if(pos<file.length() && file.charAt(pos)=='{') {
062 findClosingCurl();
063 currentEntity.endPos=pos;
064 currentEntity.type=Entity.EXTRA;
065 return currentEntity;
066 }
067 }
068 break;
069 case '{':
070 if(previousEntity.type==Entity.CLASSHEAD) {
071 pos++;
072 currentEntity.endPos=pos;
073 currentEntity.type=Entity.CLASSBODY;
074 return currentEntity;
075 }else if(previousEntity.type==Entity.METHODHEAD) {
076 findClosingCurl();
077 currentEntity.endPos=pos;
078 currentEntity.type=Entity.METHODBODY;
079 return currentEntity;
080 }else {
081 currentEntity.type=Entity.SYNTAXERROR;
082 return currentEntity;
083 }
084
085 case '/':
086 eatComment();
087 return currentEntity;
088
089 case ';':
090 if(previousEntity.type==Entity.CLASSHEAD ||
091 previousEntity.type==Entity.METHODHEAD) {
092 pos++;
093 currentEntity.type=Entity.SEMICOLON;
094 currentEntity.endPos=pos;
095 return currentEntity;
096 } else {
097 pos++;
098 currentEntity.type=Entity.EXTRA;
099 currentEntity.endPos=pos;
100 }
101 break;
102 case '}':
103 pos++;
104 currentEntity.type=Entity.ENDOFCLASSBODY;
105 currentEntity.endPos=pos;
106 return currentEntity;
107 }
108 }
109
110
111
112 while(pos<file.length()) {
113 char c=file.charAt(pos);
114
115 switch(c) {
116 case 'c':
117 if(isClass()) {
118 currentEntity.type=Entity.CLASSHEAD;
119 pos+=6;
120 } else {
121 pos++;
122 }
123 break;
124
125 case 'i':
126 if(isInterface()) {
127 currentEntity.type=Entity.CLASSHEAD;
128 pos+=10;
129 } else {
130 pos++;
131 }
132 break;
133
134 case '(':
135 /* todo unders?ök att currententity inte ?är CLASSHEAD */
136 if(findClosingParenthesis()) {
137 if(currentEntity.type!=Entity.VARIABLE) {
138 currentEntity.type=Entity.METHODHEAD;
139 }
140 }else{
141 currentEntity.type=Entity.SYNTAXERROR;
142 return currentEntity;
143 }
144 break;
145
146
147 case '{':
148 if(currentEntity.type==Entity.CLASSHEAD) {
149 backtrackWhileWhitespace();
150 currentEntity.endPos=pos;
151 return currentEntity;
152 }else if(currentEntity.type==Entity.METHODHEAD) {
153 backtrackWhileWhitespace();
154 currentEntity.endPos=pos;
155 return currentEntity;
156 }else if(currentEntity.type==Entity.VARIABLE) {
157 findClosingCurl();
158 } else {
159 currentEntity.type=Entity.SYNTAXERROR;
160 return currentEntity;
161 }
162 break;
163
164 case '}':
165 currentEntity.type=Entity.SYNTAXERROR;
166 return currentEntity;
167
168 case ';':
169 if(currentEntity.type==Entity.CLASSHEAD ||
170 currentEntity.type==Entity.METHODHEAD) {
171 backtrackWhileWhitespace();
172 currentEntity.endPos=pos;
173 } else {
174 currentEntity.type=Entity.VARIABLE;
175 pos++;
176 currentEntity.endPos=pos;
177 }
178
179 return currentEntity;
180
181 case '=':
182 currentEntity.type=Entity.VARIABLE;
183 pos++;
184 break;
185
186 case '\"':
187 findEndOfString();
188 break;
189
190 case '\'':
191 findEndOfChar();
192 break;
193
194 default:
195 pos++;
196 break;
197 }
198 }
199
200
201 if(currentEntity.contentStartPos==pos) {
202 currentEntity.type=Entity.ENDOFFILE;
203 currentEntity.endPos=pos;
204 }else{
205 currentEntity.type=Entity.SYNTAXERROR;
206 }
207 return currentEntity;
208 }
209
210 private boolean isClass() {
211 if(file.regionMatches(pos+1,"lass",0,4)) {
212 if(currentEntity.contentStartPos==pos ||
213 Character.isWhitespace(file.charAt(pos-1))) {
214 if(pos+5<file.length() &&
215 Character.isWhitespace(file.charAt(pos+5))) {
216 return true;
217 }
218 }
219
220 }
221 return false;
222 }
223
224 private boolean isInterface() {
225 if(file.regionMatches(pos+1,"nterface",0,8)) {
226 if(currentEntity.contentStartPos==pos ||
227 Character.isWhitespace(file.charAt(pos-1))) {
228 if(pos+9<file.length() &&
229 Character.isWhitespace(file.charAt(pos+9))) {
230 return true;
231 }
232 }
233 }
234 return false;
235 }
236
237 private boolean isImport() {
238 if(file.regionMatches(pos+1,"mport",0,5) &&
239 pos+6<file.length() &&
240 Character.isWhitespace(file.charAt(pos+6))) {
241 return true;
242 }else{
243 return false;
244 }
245 }
246
247
248 private boolean isPackage() {
249 if(file.regionMatches(pos+1,"ackage",0,6) &&
250 pos+7<file.length() &&
251 Character.isWhitespace(file.charAt(pos+7))) {
252 return true;
253 }else{
254 return false;
255 }
256 }
257
258
259
260 private boolean findClosingCurl() {
261 char[] chars={ '{' , '}' };
262 return findClosingPair(chars);
263 }
264
265 /** Returnerar true om slutparentes hittades */
266 private boolean findClosingParenthesis() {
267 char[] chars={ '(' , ')' };
268 return findClosingPair(chars);
269 }
270
271 private boolean findClosingPair(char[] chars) {
272 int diff=1; //inneh?åller en {
273 char first;
274
275 pos++;
276
277 while(diff!=0) {
278 if(pos>=file.length()) {
279 return false;
280 }
281 first = findFirst(chars);
282 if(first == chars[0]) diff++;
283 else if(first == chars[1]) diff--;
284 else pos++;
285
286 }
287 return true;
288 }
289
290
291
292 private void eatWhitespace() {
293 while(pos<file.length()) {
294 if(Character.isWhitespace(file.charAt(pos))) {
295 pos++;
296 }else{
297 return;
298 }
299 }
300 }
301
302 private void backtrackWhileWhitespace() {
303 pos--;
304 while(pos>=0) {
305 if(Character.isWhitespace(file.charAt(pos))) {
306 pos--;
307 }else{
308 pos++;
309 return;
310 }
311 }
312 }
313
314
315 private void moveBeyondSemicolon() {
316 char[] chars={';'};
317 pos++;
318 findFirst(chars);
319 }
320
321
322 /** Assumes start-of-comment is at current position */
323 private void findEndOfComment() {
324 int nextPos=file.indexOf("*/",pos+2);
325 if(nextPos>=0) {
326 pos=nextPos+2;
327 }
328 /* @todo fel om doc-slut inte hittades */
329 }
330
331 /** Assumes an " at current position and searches from next position */
332 private void findEndOfString() {
333 pos++;
334 while(pos<file.length()) {
335 if(file.charAt(pos) == '\\') {
336 pos+=2;
337 continue;
338 }else if(file.charAt(pos) == '\"') {
339 pos++;
340 return;
341 }else {
342 pos++;
343 }
344 }
345 /* @todo Skicka tillbaka syntax error */
346 System.out.println("Syntax error, string not closed");
347 }
348
349
350 /** S?öker efter den f?örsta f?örekomsten av n?ågon av tecknen i chars.
351 * Ignorerar str?ängar, char och kommentarer.
352 * Bra inneh?åll i chars = \n \r \f { } ;
353 * Returnerar funnet tecken och s?ätter positionen till efter tecknet.
354 */
355 private char findFirst(char[] chars) {
356 char c;
357 while(pos<file.length()) {
358 c=file.charAt(pos);
359 switch(c) {
360 case '\"':
361 findEndOfString();
362 break;
363 case '\'':
364 findEndOfChar();
365 break;
366 case '/':
367 eatComment();
368 break;
369 default:
370 for(int i=0;i<chars.length;i++) {
371 if(c==chars[i]) {
372 pos++;
373 return c;
374 }
375 }
376 pos++;
377 break;
378 }
379
380 }
381 /* @todo ska returnera syntax error */
382 System.out.println("Error finding characters");
383 return '\0';
384 }
385
386 /** Assumes that the char at the current position is /
387 * Observe that this method makes changes to currentEntity!
388 */
389 private void eatComment() {
390 if(pos+1<file.length()) {
391 if(file.charAt(pos+1) == '/') {
392 findLineEnd();
393 currentEntity.endPos=pos;
394 currentEntity.type=Entity.EXTRA;
395 return;
396 } else if(file.charAt(pos+1) == '*') {
397 if(pos+2<file.length() && file.charAt(pos+2) == '*') {
398 findEndOfComment();
399 currentEntity.endPos=pos;
400 currentEntity.type=Entity.DOCCOMMENT;
401 return;
402 } else {
403 //Ingen doc-kommentar
404 findEndOfComment();
405 currentEntity.endPos=pos;
406 currentEntity.type=Entity.EXTRA;
407 return;
408 }
409 }
410 }
411 currentEntity.type=Entity.SYNTAXERROR;
412 }
413
414
415
416 /** Assumes a char at current position */
417 private void findEndOfChar() {
418 pos++;
419 while(pos<file.length()) {
420 if(file.charAt(pos) == '\\') {
421 pos+=2;
422 continue;
423 }else if(file.charAt(pos) == '\'') {
424 pos++;
425 return;
426 }else {
427 pos++;
428 }
429 }
430 System.out.println("Syntax error, char not closed");
431 }
432
433 private void findLineEnd() {
434 pos+=2;
435
436 if(pos>=file.length()) { return; }
437
438 int min=file.indexOf('\n',pos);
439 if(min<0) min=Integer.MAX_VALUE;
440
441 int p=file.indexOf('\r',pos);
442 if(p<min && p>=0) min=p;
443
444 p=file.indexOf('\f',pos);
445 if(p<min && p>=0) min=p;
446
447 if(min!=Integer.MAX_VALUE) {
448 pos=min+1;
449 }else{
450 /* @todo returnera syntax error */
451 System.out.println("Serious file error");
452 System.exit(0);
453 }
454 }
455 }
456
457
458