001 package plugins.JavaRolePluginModule; 002 003 import java.util.*; 004 005 /** Java source code parser 006 * @author Mikael S?ödergren, modified and 007 * @version 0.05 008 * 009 * Fixed: 010 * 1. The reduction of matching curls and parenthesis works! 011 * 2. The generation of a tree 012 * 3. Added support for the constructor static { ... } 013 * 4. Smarter detection of class, interface, import and package 014 */ 015 public class Parser { 016 private Entity currentEntity; 017 private Entity previousEntity; 018 private String file; 019 private int pos; 020 021 public Parser(String file) { 022 this.file=file; 023 pos=0; 024 } 025 026 /** Att g?öra: M?åste hantera ov?äntade filslut */ 027 028 public Entity getNextEntity() { 029 previousEntity=currentEntity; 030 currentEntity=new Entity(file); 031 currentEntity.startPos=pos; 032 eatWhitespace(); 033 currentEntity.contentStartPos=pos; 034 035 /* Check if this is the first call of this method */ 036 /*if(previousEntity==null) { 037 eatWhitespace(); 038 if(currentEntity.startPos!=pos) { 039 currentEntity.type=Entity.EXTRA; 040 currentEntity.endPos=currentEntity.startPos; 041 currentEntity.whitespaceEndPos=pos; 042 return currentEntity; 043 } 044 }*/ 045 if(pos<file.length()) { 046 char c=file.charAt(pos); 047 switch(c) { 048 case 'i': 049 case 'p': 050 if(isImport() || isPackage()) { 051 moveBeyondSemicolon(); 052 currentEntity.endPos=pos; 053 currentEntity.type=Entity.EXTRA; 054 return currentEntity; 055 } 056 break; 057 case 's': 058 if(file.regionMatches(pos+1,"tatic",0,5)) { 059 pos+=6; 060 eatWhitespace(); 061 if(pos<file.length() && file.charAt(pos)=='{') { 062 findClosingCurl(); 063 currentEntity.endPos=pos; 064 currentEntity.type=Entity.EXTRA; 065 return currentEntity; 066 } 067 } 068 break; 069 case '{': 070 if(previousEntity.type==Entity.CLASSHEAD) { 071 pos++; 072 currentEntity.endPos=pos; 073 currentEntity.type=Entity.CLASSBODY; 074 return currentEntity; 075 }else if(previousEntity.type==Entity.METHODHEAD) { 076 findClosingCurl(); 077 currentEntity.endPos=pos; 078 currentEntity.type=Entity.METHODBODY; 079 return currentEntity; 080 }else { 081 currentEntity.type=Entity.SYNTAXERROR; 082 return currentEntity; 083 } 084 085 case '/': 086 eatComment(); 087 return currentEntity; 088 089 case ';': 090 if(previousEntity.type==Entity.CLASSHEAD || 091 previousEntity.type==Entity.METHODHEAD) { 092 pos++; 093 currentEntity.type=Entity.SEMICOLON; 094 currentEntity.endPos=pos; 095 return currentEntity; 096 } else { 097 pos++; 098 currentEntity.type=Entity.EXTRA; 099 currentEntity.endPos=pos; 100 } 101 break; 102 case '}': 103 pos++; 104 currentEntity.type=Entity.ENDOFCLASSBODY; 105 currentEntity.endPos=pos; 106 return currentEntity; 107 } 108 } 109 110 111 112 while(pos<file.length()) { 113 char c=file.charAt(pos); 114 115 switch(c) { 116 case 'c': 117 if(isClass()) { 118 currentEntity.type=Entity.CLASSHEAD; 119 pos+=6; 120 } else { 121 pos++; 122 } 123 break; 124 125 case 'i': 126 if(isInterface()) { 127 currentEntity.type=Entity.CLASSHEAD; 128 pos+=10; 129 } else { 130 pos++; 131 } 132 break; 133 134 case '(': 135 /* todo unders?ök att currententity inte ?är CLASSHEAD */ 136 if(findClosingParenthesis()) { 137 if(currentEntity.type!=Entity.VARIABLE) { 138 currentEntity.type=Entity.METHODHEAD; 139 } 140 }else{ 141 currentEntity.type=Entity.SYNTAXERROR; 142 return currentEntity; 143 } 144 break; 145 146 147 case '{': 148 if(currentEntity.type==Entity.CLASSHEAD) { 149 backtrackWhileWhitespace(); 150 currentEntity.endPos=pos; 151 return currentEntity; 152 }else if(currentEntity.type==Entity.METHODHEAD) { 153 backtrackWhileWhitespace(); 154 currentEntity.endPos=pos; 155 return currentEntity; 156 }else if(currentEntity.type==Entity.VARIABLE) { 157 findClosingCurl(); 158 } else { 159 currentEntity.type=Entity.SYNTAXERROR; 160 return currentEntity; 161 } 162 break; 163 164 case '}': 165 currentEntity.type=Entity.SYNTAXERROR; 166 return currentEntity; 167 168 case ';': 169 if(currentEntity.type==Entity.CLASSHEAD || 170 currentEntity.type==Entity.METHODHEAD) { 171 backtrackWhileWhitespace(); 172 currentEntity.endPos=pos; 173 } else { 174 currentEntity.type=Entity.VARIABLE; 175 pos++; 176 currentEntity.endPos=pos; 177 } 178 179 return currentEntity; 180 181 case '=': 182 currentEntity.type=Entity.VARIABLE; 183 pos++; 184 break; 185 186 case '\"': 187 findEndOfString(); 188 break; 189 190 case '\'': 191 findEndOfChar(); 192 break; 193 194 default: 195 pos++; 196 break; 197 } 198 } 199 200 201 if(currentEntity.contentStartPos==pos) { 202 currentEntity.type=Entity.ENDOFFILE; 203 currentEntity.endPos=pos; 204 }else{ 205 currentEntity.type=Entity.SYNTAXERROR; 206 } 207 return currentEntity; 208 } 209 210 private boolean isClass() { 211 if(file.regionMatches(pos+1,"lass",0,4)) { 212 if(currentEntity.contentStartPos==pos || 213 Character.isWhitespace(file.charAt(pos-1))) { 214 if(pos+5<file.length() && 215 Character.isWhitespace(file.charAt(pos+5))) { 216 return true; 217 } 218 } 219 220 } 221 return false; 222 } 223 224 private boolean isInterface() { 225 if(file.regionMatches(pos+1,"nterface",0,8)) { 226 if(currentEntity.contentStartPos==pos || 227 Character.isWhitespace(file.charAt(pos-1))) { 228 if(pos+9<file.length() && 229 Character.isWhitespace(file.charAt(pos+9))) { 230 return true; 231 } 232 } 233 } 234 return false; 235 } 236 237 private boolean isImport() { 238 if(file.regionMatches(pos+1,"mport",0,5) && 239 pos+6<file.length() && 240 Character.isWhitespace(file.charAt(pos+6))) { 241 return true; 242 }else{ 243 return false; 244 } 245 } 246 247 248 private boolean isPackage() { 249 if(file.regionMatches(pos+1,"ackage",0,6) && 250 pos+7<file.length() && 251 Character.isWhitespace(file.charAt(pos+7))) { 252 return true; 253 }else{ 254 return false; 255 } 256 } 257 258 259 260 private boolean findClosingCurl() { 261 char[] chars={ '{' , '}' }; 262 return findClosingPair(chars); 263 } 264 265 /** Returnerar true om slutparentes hittades */ 266 private boolean findClosingParenthesis() { 267 char[] chars={ '(' , ')' }; 268 return findClosingPair(chars); 269 } 270 271 private boolean findClosingPair(char[] chars) { 272 int diff=1; //inneh?åller en { 273 char first; 274 275 pos++; 276 277 while(diff!=0) { 278 if(pos>=file.length()) { 279 return false; 280 } 281 first = findFirst(chars); 282 if(first == chars[0]) diff++; 283 else if(first == chars[1]) diff--; 284 else pos++; 285 286 } 287 return true; 288 } 289 290 291 292 private void eatWhitespace() { 293 while(pos<file.length()) { 294 if(Character.isWhitespace(file.charAt(pos))) { 295 pos++; 296 }else{ 297 return; 298 } 299 } 300 } 301 302 private void backtrackWhileWhitespace() { 303 pos--; 304 while(pos>=0) { 305 if(Character.isWhitespace(file.charAt(pos))) { 306 pos--; 307 }else{ 308 pos++; 309 return; 310 } 311 } 312 } 313 314 315 private void moveBeyondSemicolon() { 316 char[] chars={';'}; 317 pos++; 318 findFirst(chars); 319 } 320 321 322 /** Assumes start-of-comment is at current position */ 323 private void findEndOfComment() { 324 int nextPos=file.indexOf("*/",pos+2); 325 if(nextPos>=0) { 326 pos=nextPos+2; 327 } 328 /* @todo fel om doc-slut inte hittades */ 329 } 330 331 /** Assumes an " at current position and searches from next position */ 332 private void findEndOfString() { 333 pos++; 334 while(pos<file.length()) { 335 if(file.charAt(pos) == '\\') { 336 pos+=2; 337 continue; 338 }else if(file.charAt(pos) == '\"') { 339 pos++; 340 return; 341 }else { 342 pos++; 343 } 344 } 345 /* @todo Skicka tillbaka syntax error */ 346 System.out.println("Syntax error, string not closed"); 347 } 348 349 350 /** S?öker efter den f?örsta f?örekomsten av n?ågon av tecknen i chars. 351 * Ignorerar str?ängar, char och kommentarer. 352 * Bra inneh?åll i chars = \n \r \f { } ; 353 * Returnerar funnet tecken och s?ätter positionen till efter tecknet. 354 */ 355 private char findFirst(char[] chars) { 356 char c; 357 while(pos<file.length()) { 358 c=file.charAt(pos); 359 switch(c) { 360 case '\"': 361 findEndOfString(); 362 break; 363 case '\'': 364 findEndOfChar(); 365 break; 366 case '/': 367 eatComment(); 368 break; 369 default: 370 for(int i=0;i<chars.length;i++) { 371 if(c==chars[i]) { 372 pos++; 373 return c; 374 } 375 } 376 pos++; 377 break; 378 } 379 380 } 381 /* @todo ska returnera syntax error */ 382 System.out.println("Error finding characters"); 383 return '\0'; 384 } 385 386 /** Assumes that the char at the current position is / 387 * Observe that this method makes changes to currentEntity! 388 */ 389 private void eatComment() { 390 if(pos+1<file.length()) { 391 if(file.charAt(pos+1) == '/') { 392 findLineEnd(); 393 currentEntity.endPos=pos; 394 currentEntity.type=Entity.EXTRA; 395 return; 396 } else if(file.charAt(pos+1) == '*') { 397 if(pos+2<file.length() && file.charAt(pos+2) == '*') { 398 findEndOfComment(); 399 currentEntity.endPos=pos; 400 currentEntity.type=Entity.DOCCOMMENT; 401 return; 402 } else { 403 //Ingen doc-kommentar 404 findEndOfComment(); 405 currentEntity.endPos=pos; 406 currentEntity.type=Entity.EXTRA; 407 return; 408 } 409 } 410 } 411 currentEntity.type=Entity.SYNTAXERROR; 412 } 413 414 415 416 /** Assumes a char at current position */ 417 private void findEndOfChar() { 418 pos++; 419 while(pos<file.length()) { 420 if(file.charAt(pos) == '\\') { 421 pos+=2; 422 continue; 423 }else if(file.charAt(pos) == '\'') { 424 pos++; 425 return; 426 }else { 427 pos++; 428 } 429 } 430 System.out.println("Syntax error, char not closed"); 431 } 432 433 private void findLineEnd() { 434 pos+=2; 435 436 if(pos>=file.length()) { return; } 437 438 int min=file.indexOf('\n',pos); 439 if(min<0) min=Integer.MAX_VALUE; 440 441 int p=file.indexOf('\r',pos); 442 if(p<min && p>=0) min=p; 443 444 p=file.indexOf('\f',pos); 445 if(p<min && p>=0) min=p; 446 447 if(min!=Integer.MAX_VALUE) { 448 pos=min+1; 449 }else{ 450 /* @todo returnera syntax error */ 451 System.out.println("Serious file error"); 452 System.exit(0); 453 } 454 } 455 } 456 457 458