|
Coreferencer |
|
1 /* 2 * Coreferencer.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Marin Dimitrov, 18/Dec/2001 12 * 13 * $Id: Coreferencer.java,v 1.14 2002/03/07 16:55:26 marin Exp $ 14 */ 15 16 package gate.creole.coref; 17 18 import java.util.*; 19 20 import junit.framework.*; 21 22 import gate.*; 23 import gate.creole.*; 24 import gate.util.*; 25 26 public class Coreferencer extends AbstractLanguageAnalyser 27 implements ProcessingResource{ 28 29 public static final String COREF_DOCUMENT_PARAMETER_NAME = "document"; 30 31 public static final String COREF_ANN_SET_PARAMETER_NAME = "annotationSetName"; 32 33 public static final String COREF_TYPE_FEATURE_NAME = "ENTITY_MENTION_TYPE"; 34 public static final String COREF_ANTECEDENT_FEATURE_NAME = "antecedent_offset"; 35 36 /** --- */ 37 private static final boolean DEBUG = false; 38 /** --- */ 39 private PronominalCoref pronominalModule; 40 41 /** --- */ 42 public Coreferencer() { 43 this.pronominalModule = new PronominalCoref(); 44 } 45 46 47 /** Initialise this resource, and return it. */ 48 public Resource init() throws ResourceInstantiationException { 49 50 Resource result = super.init(); 51 52 //load all submodules 53 this.pronominalModule.init(); 54 55 return result; 56 } // init() 57 58 59 /** 60 * Reinitialises the processing resource. After calling this method the 61 * resource should be in the state it is after calling init. 62 * If the resource depends on external resources (such as rules files) then 63 * the resource will re-read those resources. If the data used to create 64 * the resource has changed since the resource has been created then the 65 * resource will change too after calling reInit(). 66 */ 67 public void reInit() throws ResourceInstantiationException { 68 init(); 69 } // reInit() 70 71 72 /** Set the document to run on. */ 73 public void setDocument(Document newDocument) { 74 75 // Assert.assertNotNull(newDocument); 76 77 this.pronominalModule.setDocument(newDocument); 78 super.setDocument(newDocument); 79 } 80 81 82 /** --- */ 83 public void setAnnotationSetName(String annotationSetName) { 84 this.pronominalModule.setAnnotationSetName(annotationSetName); 85 } 86 87 /** --- */ 88 public String getAnnotationSetName() { 89 return this.pronominalModule.getAnnotationSetName(); 90 } 91 92 /** 93 * This method runs the coreferencer. It assumes that all the needed parameters 94 * are set. If they are not, an exception will be fired. 95 */ 96 public void execute() throws ExecutionException { 97 98 this.pronominalModule.execute(); 99 generateCorefChains(); 100 } 101 102 /** --- */ 103 private void generateCorefChains() throws GateRuntimeException{ 104 105 //1. get the resolved corefs 106 HashMap ana2ant = this.pronominalModule.getResolvedAnaphora(); 107 108 //2. get the outout annotation set 109 String asName = getAnnotationSetName(); 110 AnnotationSet outputSet = null; 111 112 if (null == asName || asName.equals("")) { 113 outputSet = getDocument().getAnnotations(); 114 } 115 else { 116 outputSet = getDocument().getAnnotations(asName); 117 } 118 119 //3. generate new annotations 120 Iterator it = ana2ant.entrySet().iterator(); 121 while (it.hasNext()) { 122 Map.Entry currLink = (Map.Entry)it.next(); 123 Annotation anaphor = (Annotation)currLink.getKey(); 124 Annotation antecedent = (Annotation)currLink.getValue(); 125 126 if (DEBUG) { 127 AnnotationSet corefSet = getDocument().getAnnotations("COREF"); 128 Long antOffset = new Long(0); 129 130 if (null != antecedent) { 131 antOffset = antecedent.getStartNode().getOffset(); 132 } 133 134 FeatureMap features = new SimpleFeatureMapImpl(); 135 features.put("antecedent",antOffset); 136 corefSet.add(anaphor.getStartNode(),anaphor.getEndNode(),"COREF",features); 137 } 138 139 //do we have antecedent? 140 if (null == antecedent) { 141 continue; 142 } 143 144 //get the ortho-matches of the antecedent 145 List matches = (List)antecedent.getFeatures(). 146 get(ANNOTATION_COREF_FEATURE_NAME); 147 if (null == matches) { 148 matches = new ArrayList(); 149 matches.add(antecedent.getId()); 150 antecedent.getFeatures(). 151 put(ANNOTATION_COREF_FEATURE_NAME,matches); 152 //check if the document has a list of matches 153 //if yes, simply add the new list to it 154 //if not, create it and add the list of matches to it 155 if (document.getFeatures().containsKey( 156 DOCUMENT_COREF_FEATURE_NAME)) { 157 Map matchesMap = (Map) document.getFeatures().get( 158 DOCUMENT_COREF_FEATURE_NAME); 159 List matchesList = (List) matchesMap.get(getAnnotationSetName()); 160 if (matchesList == null) { 161 matchesList = new ArrayList(); 162 matchesMap.put(getAnnotationSetName(), matchesList); 163 } 164 matchesList.add(matches); 165 } else { 166 Map matchesMap = new HashMap(); 167 List matchesList = new ArrayList(); 168 matchesMap.put(getAnnotationSetName(), matchesList); 169 matchesList.add(matches); 170 }//if else 171 }//if matches == null 172 173 FeatureMap features = new SimpleFeatureMapImpl(); 174 features.put(COREF_TYPE_FEATURE_NAME,"PRONOUN"); 175 features.put(ANNOTATION_COREF_FEATURE_NAME,matches); 176 features.put(COREF_ANTECEDENT_FEATURE_NAME, 177 antecedent.getStartNode().getOffset()); 178 179 Integer annID = outputSet.add(anaphor.getStartNode(), 180 anaphor.getEndNode(), 181 antecedent.getType(), 182 features); 183 matches.add(annID); 184 } 185 } 186 187 }
|
Coreferencer |
|