001    package plugins.JavaRolePluginModule;
002    
003    import java.util.*;
004    
005    /** Java source code parser
006     *  @author Mikael S?ödergren, modified and 
007     *  @version 0.05
008     *
009     *  Fixed:
010     *  1. The reduction of matching curls and parenthesis works!
011     *  2. The generation of a tree
012     *  3. Added support for the constructor static { ... }
013     *  4. Smarter detection of class, interface, import and package
014     */
015    public class Parser {
016      private Entity currentEntity; 
017      private Entity previousEntity;
018      private String file;
019      private int pos;
020    
021      public Parser(String file) {
022        this.file=file;
023        pos=0;
024      }
025    
026      /** Att g?öra: M?åste hantera ov?äntade filslut */
027    
028      public Entity getNextEntity() {
029        previousEntity=currentEntity;
030        currentEntity=new Entity(file);
031        currentEntity.startPos=pos;
032        eatWhitespace();
033        currentEntity.contentStartPos=pos;
034    
035        /* Check if this is the first call of this method */
036        /*if(previousEntity==null) {
037          eatWhitespace();
038          if(currentEntity.startPos!=pos) {
039          currentEntity.type=Entity.EXTRA;
040          currentEntity.endPos=currentEntity.startPos;
041          currentEntity.whitespaceEndPos=pos;
042          return currentEntity;
043          }
044          }*/
045        if(pos<file.length()) {
046          char c=file.charAt(pos);
047          switch(c) {
048          case 'i':
049          case 'p':
050            if(isImport() || isPackage()) {
051              moveBeyondSemicolon();
052              currentEntity.endPos=pos;
053              currentEntity.type=Entity.EXTRA;
054              return currentEntity;
055            }
056            break;
057          case 's':
058            if(file.regionMatches(pos+1,"tatic",0,5)) {
059              pos+=6;
060              eatWhitespace();
061              if(pos<file.length() && file.charAt(pos)=='{') {
062                findClosingCurl();
063                currentEntity.endPos=pos;
064                currentEntity.type=Entity.EXTRA;
065                return currentEntity;
066              }
067            }
068            break;
069          case '{':
070            if(previousEntity.type==Entity.CLASSHEAD) {
071              pos++;
072              currentEntity.endPos=pos;
073              currentEntity.type=Entity.CLASSBODY;
074              return currentEntity;
075            }else if(previousEntity.type==Entity.METHODHEAD) {
076              findClosingCurl();
077              currentEntity.endPos=pos;
078              currentEntity.type=Entity.METHODBODY;
079              return currentEntity;
080            }else {
081              currentEntity.type=Entity.SYNTAXERROR;
082              return currentEntity;
083            }
084    
085          case '/':
086            eatComment();
087            return currentEntity;
088    
089          case ';':
090            if(previousEntity.type==Entity.CLASSHEAD ||
091               previousEntity.type==Entity.METHODHEAD) {
092              pos++;
093              currentEntity.type=Entity.SEMICOLON;
094              currentEntity.endPos=pos;
095              return currentEntity;
096            } else {
097              pos++;
098              currentEntity.type=Entity.EXTRA;
099              currentEntity.endPos=pos;
100            }
101            break;
102          case '}':
103            pos++;
104            currentEntity.type=Entity.ENDOFCLASSBODY;
105            currentEntity.endPos=pos;
106            return currentEntity;
107          }
108        }
109                
110        
111    
112        while(pos<file.length()) {
113          char c=file.charAt(pos);
114    
115          switch(c) {
116          case 'c':
117            if(isClass()) {
118              currentEntity.type=Entity.CLASSHEAD;
119              pos+=6;
120            } else {
121              pos++;
122            }
123            break;
124    
125          case 'i':
126            if(isInterface()) {
127              currentEntity.type=Entity.CLASSHEAD;
128              pos+=10;
129            } else {
130              pos++;
131            }
132            break;
133    
134          case '(':
135            /* todo unders?ök att currententity inte ?är CLASSHEAD */
136            if(findClosingParenthesis()) {
137              if(currentEntity.type!=Entity.VARIABLE) {
138                currentEntity.type=Entity.METHODHEAD;
139              }
140            }else{
141              currentEntity.type=Entity.SYNTAXERROR;
142              return currentEntity;
143            }
144            break;
145    
146    
147          case '{':
148            if(currentEntity.type==Entity.CLASSHEAD) {
149              backtrackWhileWhitespace();
150              currentEntity.endPos=pos;
151              return currentEntity;
152            }else if(currentEntity.type==Entity.METHODHEAD) {
153              backtrackWhileWhitespace();
154              currentEntity.endPos=pos;
155              return currentEntity;
156            }else if(currentEntity.type==Entity.VARIABLE) {
157              findClosingCurl();
158            } else {
159              currentEntity.type=Entity.SYNTAXERROR;
160              return currentEntity;
161            }
162            break;
163    
164          case '}':
165            currentEntity.type=Entity.SYNTAXERROR;
166            return currentEntity;
167            
168          case ';':
169            if(currentEntity.type==Entity.CLASSHEAD ||
170               currentEntity.type==Entity.METHODHEAD) {
171              backtrackWhileWhitespace();
172              currentEntity.endPos=pos;
173            } else {
174              currentEntity.type=Entity.VARIABLE;
175              pos++;
176              currentEntity.endPos=pos;
177            }
178    
179            return currentEntity;
180    
181          case '=':
182            currentEntity.type=Entity.VARIABLE;
183            pos++;
184            break;
185              
186          case '\"':
187            findEndOfString();
188            break;
189    
190          case '\'':
191            findEndOfChar();
192            break;
193    
194          default:
195            pos++;
196            break;
197          }
198        }
199    
200    
201        if(currentEntity.contentStartPos==pos) {
202          currentEntity.type=Entity.ENDOFFILE;
203          currentEntity.endPos=pos;
204        }else{
205          currentEntity.type=Entity.SYNTAXERROR;
206        }
207        return currentEntity;
208      }
209    
210      private boolean isClass() {
211        if(file.regionMatches(pos+1,"lass",0,4)) {
212          if(currentEntity.contentStartPos==pos ||
213             Character.isWhitespace(file.charAt(pos-1))) {
214            if(pos+5<file.length() &&
215               Character.isWhitespace(file.charAt(pos+5))) {
216              return true;
217            }
218          }
219    
220        } 
221        return false;
222      }
223    
224      private boolean isInterface() {
225        if(file.regionMatches(pos+1,"nterface",0,8)) {
226          if(currentEntity.contentStartPos==pos ||
227             Character.isWhitespace(file.charAt(pos-1))) {
228            if(pos+9<file.length() &&
229               Character.isWhitespace(file.charAt(pos+9))) {
230              return true;
231            }
232          }
233        }
234        return false;
235      }
236    
237      private boolean isImport() {
238        if(file.regionMatches(pos+1,"mport",0,5) &&
239           pos+6<file.length() &&
240           Character.isWhitespace(file.charAt(pos+6))) {
241          return true;
242        }else{
243          return false;
244        }
245      }
246    
247    
248      private boolean isPackage() {
249        if(file.regionMatches(pos+1,"ackage",0,6) &&
250           pos+7<file.length() &&
251           Character.isWhitespace(file.charAt(pos+7))) {
252          return true;
253        }else{
254          return false;
255        }
256      }
257    
258    
259    
260      private boolean findClosingCurl() {
261        char[] chars={ '{' , '}' };
262        return findClosingPair(chars);
263      }
264    
265      /** Returnerar true om slutparentes hittades */
266      private boolean findClosingParenthesis() {
267        char[] chars={ '(' , ')' };
268        return findClosingPair(chars);
269      }
270    
271      private boolean findClosingPair(char[] chars) {
272        int diff=1; //inneh?åller en {
273        char first;
274    
275        pos++;
276    
277        while(diff!=0) {
278          if(pos>=file.length()) {
279            return false;
280          }
281          first = findFirst(chars);
282          if(first == chars[0]) diff++;
283          else if(first == chars[1]) diff--;
284          else pos++;
285    
286        }
287        return true;
288      }
289    
290    
291    
292      private void eatWhitespace() {
293        while(pos<file.length()) {
294          if(Character.isWhitespace(file.charAt(pos))) {
295            pos++;
296          }else{
297            return;
298          }
299        }
300      }
301    
302      private void backtrackWhileWhitespace() {
303        pos--;
304        while(pos>=0) {
305          if(Character.isWhitespace(file.charAt(pos))) {
306            pos--;
307          }else{
308            pos++;
309            return;
310          }
311        }
312      }
313    
314    
315      private void moveBeyondSemicolon() {
316        char[] chars={';'};
317        pos++;
318        findFirst(chars);
319      }
320    
321    
322      /** Assumes start-of-comment is at current position */
323      private void findEndOfComment() {
324        int nextPos=file.indexOf("*/",pos+2);
325        if(nextPos>=0) {
326          pos=nextPos+2;
327        }
328        /* @todo fel om doc-slut inte hittades */
329      }
330    
331      /** Assumes an " at current position and searches from next position */
332      private void findEndOfString() {
333        pos++;
334        while(pos<file.length()) {
335          if(file.charAt(pos) == '\\') {
336            pos+=2;
337            continue;
338          }else if(file.charAt(pos) == '\"') {
339            pos++;
340            return;
341          }else {
342            pos++;
343          }
344        }
345        /* @todo Skicka tillbaka syntax error */
346        System.out.println("Syntax error, string not closed");
347      }
348    
349    
350      /** S?öker efter den f?örsta f?örekomsten av n?ågon av tecknen i chars.
351       *  Ignorerar str?ängar, char och kommentarer.
352       *  Bra inneh?åll i chars = \n \r \f { } ;
353       *  Returnerar funnet tecken och s?ätter positionen till efter tecknet.
354       */
355      private char findFirst(char[] chars) {
356        char c;
357        while(pos<file.length()) {
358          c=file.charAt(pos);
359          switch(c) {
360          case '\"':
361            findEndOfString();
362            break;
363          case '\'':
364            findEndOfChar();
365            break;
366          case '/':
367            eatComment();
368            break;
369          default:
370            for(int i=0;i<chars.length;i++) {
371              if(c==chars[i]) {
372                pos++;
373                return c;
374              }
375            }
376            pos++;
377            break;
378          }
379    
380        }
381        /* @todo ska returnera syntax error */
382        System.out.println("Error finding characters");
383        return '\0';
384      }
385    
386      /** Assumes that the char at the current position is /
387       *  Observe that this method makes changes to currentEntity!
388       */
389      private void eatComment() {
390        if(pos+1<file.length()) {
391          if(file.charAt(pos+1) == '/') {
392            findLineEnd();
393            currentEntity.endPos=pos;
394            currentEntity.type=Entity.EXTRA;
395            return;
396          } else if(file.charAt(pos+1) == '*') {
397            if(pos+2<file.length() && file.charAt(pos+2) == '*') {
398              findEndOfComment();
399              currentEntity.endPos=pos;
400              currentEntity.type=Entity.DOCCOMMENT;
401              return;
402            } else {
403              //Ingen doc-kommentar
404              findEndOfComment();
405              currentEntity.endPos=pos;
406              currentEntity.type=Entity.EXTRA;
407              return;
408            }
409          }
410        }
411        currentEntity.type=Entity.SYNTAXERROR;
412      }
413    
414    
415    
416      /** Assumes a char at current position */
417      private void findEndOfChar() {
418        pos++;
419        while(pos<file.length()) {
420          if(file.charAt(pos) == '\\') {
421            pos+=2;
422            continue;
423          }else if(file.charAt(pos) == '\'') {
424            pos++;
425            return;
426          }else {
427            pos++;
428          }
429        }
430        System.out.println("Syntax error, char not closed");
431      }
432    
433      private void findLineEnd() {
434        pos+=2;
435    
436        if(pos>=file.length()) { return; }
437    
438        int min=file.indexOf('\n',pos);
439        if(min<0) min=Integer.MAX_VALUE;
440    
441        int p=file.indexOf('\r',pos);
442        if(p<min && p>=0) min=p;
443    
444        p=file.indexOf('\f',pos);
445        if(p<min && p>=0) min=p;
446    
447        if(min!=Integer.MAX_VALUE) {
448          pos=min+1;
449        }else{
450          /* @todo returnera syntax error */
451          System.out.println("Serious file error");
452          System.exit(0);
453        }
454      }
455    }
456    
457    
458