1   /*
2    *  Coreferencer.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Marin Dimitrov, 18/Dec/2001
12   *
13   *  $Id: Coreferencer.java,v 1.14 2002/03/07 16:55:26 marin Exp $
14   */
15  
16  package gate.creole.coref;
17  
18  import java.util.*;
19  
20  import junit.framework.*;
21  
22  import gate.*;
23  import gate.creole.*;
24  import gate.util.*;
25  
26  public class Coreferencer extends AbstractLanguageAnalyser
27                            implements ProcessingResource{
28  
29    public static final String COREF_DOCUMENT_PARAMETER_NAME = "document";
30  
31    public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName";
32  
33    public static final String COREF_TYPE_FEATURE_NAME = "ENTITY_MENTION_TYPE";
34    public static final String COREF_ANTECEDENT_FEATURE_NAME = "antecedent_offset";
35  
36    /** --- */
37    private static final boolean DEBUG = false;
38    /** --- */
39    private PronominalCoref pronominalModule;
40  
41    /** --- */
42    public Coreferencer() {
43      this.pronominalModule = new PronominalCoref();
44    }
45  
46  
47    /** Initialise this resource, and return it. */
48    public Resource init() throws ResourceInstantiationException {
49  
50      Resource result = super.init();
51  
52      //load all submodules
53      this.pronominalModule.init();
54  
55      return result;
56    } // init()
57  
58  
59    /**
60     * Reinitialises the processing resource. After calling this method the
61     * resource should be in the state it is after calling init.
62     * If the resource depends on external resources (such as rules files) then
63     * the resource will re-read those resources. If the data used to create
64     * the resource has changed since the resource has been created then the
65     * resource will change too after calling reInit().
66    */
67    public void reInit() throws ResourceInstantiationException {
68      init();
69    } // reInit()
70  
71  
72    /** Set the document to run on. */
73    public void setDocument(Document newDocument) {
74  
75  //    Assert.assertNotNull(newDocument);
76  
77      this.pronominalModule.setDocument(newDocument);
78      super.setDocument(newDocument);
79    }
80  
81  
82    /** --- */
83    public void setAnnotationSetName(String annotationSetName) {
84      this.pronominalModule.setAnnotationSetName(annotationSetName);
85    }
86  
87    /** --- */
88    public String getAnnotationSetName() {
89      return this.pronominalModule.getAnnotationSetName();
90    }
91  
92    /**
93     * This method runs the coreferencer. It assumes that all the needed parameters
94     * are set. If they are not, an exception will be fired.
95     */
96    public void execute() throws ExecutionException {
97  
98      this.pronominalModule.execute();
99      generateCorefChains();
100   }
101 
102   /** --- */
103   private void generateCorefChains() throws GateRuntimeException{
104 
105     //1. get the resolved corefs
106     HashMap ana2ant = this.pronominalModule.getResolvedAnaphora();
107 
108     //2. get the outout annotation set
109     String asName = getAnnotationSetName();
110     AnnotationSet outputSet = null;
111 
112     if (null == asName || asName.equals("")) {
113       outputSet = getDocument().getAnnotations();
114     }
115     else {
116       outputSet = getDocument().getAnnotations(asName);
117     }
118 
119     //3. generate new annotations
120     Iterator it = ana2ant.entrySet().iterator();
121     while (it.hasNext()) {
122       Map.Entry currLink = (Map.Entry)it.next();
123       Annotation anaphor = (Annotation)currLink.getKey();
124       Annotation antecedent = (Annotation)currLink.getValue();
125 
126       if (DEBUG) {
127         AnnotationSet corefSet = getDocument().getAnnotations("COREF");
128         Long antOffset = new Long(0);
129 
130         if (null != antecedent) {
131           antOffset = antecedent.getStartNode().getOffset();
132         }
133 
134         FeatureMap features = new SimpleFeatureMapImpl();
135         features.put("antecedent",antOffset);
136         corefSet.add(anaphor.getStartNode(),anaphor.getEndNode(),"COREF",features);
137       }
138 
139       //do we have antecedent?
140       if (null == antecedent) {
141         continue;
142       }
143 
144       //get the ortho-matches of the antecedent
145       List matches = (List)antecedent.getFeatures().
146         get(ANNOTATION_COREF_FEATURE_NAME);
147       if (null == matches) {
148         matches = new ArrayList();
149         matches.add(antecedent.getId());
150         antecedent.getFeatures().
151           put(ANNOTATION_COREF_FEATURE_NAME,matches);
152         //check if the document has a list of matches
153         //if yes, simply add the new list to it
154         //if not, create it and add the list of matches to it
155         if (document.getFeatures().containsKey(
156             DOCUMENT_COREF_FEATURE_NAME)) {
157           Map matchesMap = (Map) document.getFeatures().get(
158                                 DOCUMENT_COREF_FEATURE_NAME);
159           List matchesList = (List) matchesMap.get(getAnnotationSetName());
160           if (matchesList == null) {
161             matchesList = new ArrayList();
162             matchesMap.put(getAnnotationSetName(), matchesList);
163           }
164           matchesList.add(matches);
165         } else {
166           Map matchesMap = new HashMap();
167             List matchesList = new ArrayList();
168             matchesMap.put(getAnnotationSetName(), matchesList);
169             matchesList.add(matches);
170         }//if else
171       }//if matches == null
172 
173       FeatureMap features = new SimpleFeatureMapImpl();
174       features.put(COREF_TYPE_FEATURE_NAME,"PRONOUN");
175       features.put(ANNOTATION_COREF_FEATURE_NAME,matches);
176       features.put(COREF_ANTECEDENT_FEATURE_NAME,
177                    antecedent.getStartNode().getOffset());
178 
179       Integer annID = outputSet.add(anaphor.getStartNode(),
180                                     anaphor.getEndNode(),
181                                     antecedent.getType(),
182                                     features);
183       matches.add(annID);
184     }
185   }
186 
187 }