1   /*
2    *  DFSMState.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Valentin Tablan, 27/06/2000
12   *
13   *  $Id: DFSMState.java,v 1.14 2001/09/26 11:41:05 marin Exp $
14   */
15  
16   /*
17      modified by OntoText, Aug 29
18  
19   */
20  
21  package gate.creole.tokeniser;
22  
23  import java.util.*;
24  
25  import gate.util.*;
26  
27  /** Implements a state of the deterministic finite state machine of the
28    * tokeniser.
29    * It differs from {@link FSMState FSMState} by the definition of the
30    * transition function which in this case maps character types to other states
31    * as oposed to the transition function from FSMState which maps character
32    * types to sets of states, hence the nondeterministic character.
33    * {@see FSMState FSMState}
34    */
35  class DFSMState implements java.io.Serializable { //extends FSMState{
36  
37    /** Debug flag */
38    private static final boolean DEBUG = false;
39  
40    /** Constructs a new DFSMState object and adds it to the list of deterministic
41      * states of the {@link DefaultTokeniser DefaultTokeniser} provided as owner.
42      * @param owner a {@link DefaultTokeniser DefaultTokeniser} object
43      */
44    public DFSMState(SimpleTokeniser owner){
45      myIndex = index++;
46      owner.dfsmStates.add(this);
47    }
48  
49    /** Adds a new mapping in the transition function of this state
50      * @param type the UnicodeType for this mapping
51      * @state the next state of the FSM Machine when a character of type type
52      * is read from the input.
53      */
54    void put(UnicodeType type, DFSMState state){
55      put(type.type, state);
56    } // put(UnicodeType type, DFSMState state)
57  
58    /** Adds a new mapping using the actual index in the internal array.
59      * This method is for internal use only. Use
60      * {@link #put(gate.creole.tokeniser.UnicodeType,
61      *             gate.creole.tokeniser.DFSMState)} instead.
62      */
63    void put(int index, DFSMState state){
64      transitionFunction[index] = state;
65    } // put(int index, DFSMState state)
66  
67    /** This method is used to access the transition function of this state.
68      * @param type the Unicode type identifier as the corresponding static value
69      * on {@link java.lang.Character}
70      */
71    DFSMState next(int type){//UnicodeType type){
72      return transitionFunction[type];
73    } // next
74  
75    /** Returns a GML (Graph Modelling Language) representation of the edges
76      * emerging from this state
77      */
78    String getEdgesGML(){
79      ///String res = "";
80      //OT
81      StringBuffer res = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
82      Set nextSet;
83      Iterator nextSetIter;
84      DFSMState nextState;
85  
86      for(int i = 0; i< transitionFunction.length; i++){
87        nextState = transitionFunction[i];
88        if(null != nextState){
89          /*
90          res += "edge [ source " + myIndex +
91          " target " + nextState.getIndex() +
92          " label \"";
93          res += SimpleTokeniser.typeMnemonics[i];
94          res += "\" ]\n";
95          */
96          //OT
97          res.append("edge [ source ");
98          res.append(myIndex);
99          res.append(" target ");
100         res.append(nextState.getIndex());
101         res.append(" label \"");
102         res.append(SimpleTokeniser.typeMnemonics[i]);
103         res.append("\" ]\n");
104       }
105     };
106     return res.toString();
107   } // getEdgesGML
108 
109   /** Builds the token description for the token that will be generated when
110     * this <b>final</b> state will be reached and the action associated with it
111     * will be fired.
112     * See also {@link #setRhs(String)}.
113     */
114   void buildTokenDesc() throws TokeniserException{
115     String ignorables = " \t\f";
116     String token = null,
117            type = null,
118            attribute = null,
119            value = null
120            ///prefix = null,
121            ///read =""
122            ;
123     //OT
124     StringBuffer prefix = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
125     StringBuffer read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
126 
127     LinkedList attributes = new LinkedList(),
128                values = new LinkedList();
129     StringTokenizer mainSt =
130       new StringTokenizer(rhs, ignorables + "\\\";=", true);
131 
132     int descIndex = 0;
133     //phase means:
134     //0 == looking for type;
135     //1 == looking for attribute;
136     //2 == looking for value;
137     //3 == write the attr/value pair
138     int phase = 0;
139 
140     while(mainSt.hasMoreTokens()) {
141       token = SimpleTokeniser.skipIgnoreTokens(mainSt);
142 
143       if(token.equals("\\")){
144         if(null == prefix)
145             ///prefix = mainSt.nextToken();
146         //OT
147             prefix = new StringBuffer(mainSt.nextToken());
148         else ///prefix += mainSt.nextToken();
149         //OT
150             prefix.append(mainSt.nextToken());
151         continue;
152       } else if(null != prefix) {
153         ///read += prefix;
154         //OT
155         read.append(prefix);
156         prefix = null;
157       }
158 
159       if(token.equals("\"")){
160         ///read = mainSt.nextToken("\"");
161         //OT
162         read = new StringBuffer(mainSt.nextToken("\""));
163         if(read.equals("\"")) ///read = "";
164             read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
165         else {
166           //delete the remaining enclosing quote and restore the delimiters
167           mainSt.nextToken(ignorables + "\\\";=");
168         }
169 
170       } else if(token.equals("=")) {
171 
172         if(phase == 1){
173           ///attribute = read;
174           //OT
175           attribute = read.toString();
176           ///read = "";
177           //OT
178           read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
179           phase = 2;
180         }else throw new TokeniserException("Invalid attribute format: " +
181                                            read);
182       } else if(token.equals(";")) {
183         if(phase == 0){
184           ///type = read;
185           type = read.toString();
186           ///read = "";
187           read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
188           //Out.print("Type: " + type);
189           attributes.addLast(type);
190           values.addLast("");
191           phase = 1;
192         } else if(phase == 2) {
193           ///value = read;
194           value = read.toString();
195           ///read = "";
196           read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
197           phase = 3;
198         } else throw new TokeniserException("Invalid value format: " +
199                                            read);
200       } else ///read += token;
201             read.append(token);
202 
203       if(phase == 3) {
204         // Out.print("; " + attribute + "=" + value);
205         attributes.addLast(attribute);
206         values.addLast(value);
207         phase = 1;
208       }
209     }
210     //Out.println();
211     if(attributes.size() < 1)
212       throw new InvalidRuleException("Invalid right hand side " + rhs);
213     tokenDesc = new String[attributes.size()][2];
214 
215     for(int i = 0; i < attributes.size(); i++) {
216       tokenDesc[i][0] = (String)attributes.get(i);
217       tokenDesc[i][1] = (String)values.get(i);
218     }
219 
220     // for(int i = 0; i < attributes.size(); i++){
221     //    Out.println(tokenDesc[i][0] + "=" +
222     //                  tokenDesc[i][1]);
223     // }
224   } // buildTokenDesc
225 
226   /** Sets the right hand side associated with this state. The RHS is
227     * represented as a string value that will be parsed by the
228     * {@link #buildTokenDesc()} method being converted in a table of strings
229     * with 2 columns and as many lines as necessary.
230     * @param rhs the RHS string
231     */
232   void setRhs(String rhs) { this.rhs = rhs; }
233 
234   /** Returns the RHS string*/
235   String getRhs(){return rhs;}
236 
237   /** Checks whether this state is a final one*/
238   boolean isFinal() { return (null != rhs); }
239 
240   /** Returns the unique ID of this state.*/
241   int getIndex() { return myIndex; }
242 
243   /** Returns the token description associated with this state. This description
244     * is built by {@link #buildTokenDesc()} method and consists of a table of
245     * strings having two columns.
246     * The first line of the table contains the annotation type on the first
247     * position and nothing on the second.
248     * Each line after the first one contains a attribute on the first position
249     * and its associated value on the second.
250     */
251   String[][] getTokenDesc() {
252     return tokenDesc;
253   }
254 
255   /** A table of strings describing an annotation.
256     * The first line of the table contains the annotation type on the first
257     * position and nothing on the second.
258     * Each line after the first one contains a attribute on the first position
259     * and its associated value on the second.
260     */
261   String[][] tokenDesc;
262 
263   /** The transition function of this state.
264     */
265   DFSMState[] transitionFunction = new DFSMState[SimpleTokeniser.maxTypeId];
266 
267   /** The string of the RHS of the rule from which the token
268     * description is built
269     */
270   String rhs;
271 
272   /** The unique index of this state*/
273   int myIndex;
274 
275   /** Used to generate unique indices for all the objects of this class*/
276   static int index;
277 
278   static {
279     index = 0;
280   }
281 
282 } // class DFSMState
283