1   
2   /*
3    *  RDFFormatExporter.java
4    *
5    *  Copyright (c) 1998-2004, The University of Sheffield.
6    *
7    *  This file is part of GATE (see http://gate.ac.uk/), and is free
8    *  software, licenced under the GNU Library General Public License,
9    *  Version 2, June 1991 (in the distribution as file licence.html,
10   *  and also available at http://gate.ac.uk/gate/licence.html).
11   *
12   *  Marin Dimitrov, 07/May/2002
13   *
14   *  $Id: RDFFormatExporter.java,v 1.14 2004/07/21 17:10:03 akshay Exp $
15   */
16  
17  package gate.creole;
18  
19  import java.io.FileWriter;
20  import java.io.Writer;
21  import java.net.URL;
22  import java.util.*;
23  
24  import junit.framework.Assert;
25  
26  import com.hp.hpl.jena.daml.*;
27  import com.hp.hpl.jena.daml.common.DAMLModelImpl;
28  import com.hp.hpl.mesa.rdf.jena.common.RDFWriterFImpl;
29  import com.hp.hpl.mesa.rdf.jena.model.RDFWriter;
30  
31  import gate.*;
32  
33  
34  public class RDFFormatExporter extends AbstractLanguageAnalyser {
35  
36    private static final int DAML_EXPORT = 0;
37    private static final int RDF_EXPORT = 1;
38  
39    private static final String[] EXPORT_FORMATS = {"DAML+OIL","RDF"};
40    private static final String[] EXPORT_EXTS = {"daml","rdf"};
41  
42    private static final String ONTOGAZ_CLASS_FEATURE = "class";
43    private static final String ONTOGAZ_ONTOLOGY_FEATURE = "ontology";
44  
45    /** Debug flag */
46    private static final boolean DEBUG = false;
47  
48    private int exportFormat;
49  
50    /** This list of strings represents the entities type that will be exported*/
51    private List exportedTypes = null;
52  
53    private URL exportFilePath = null;
54  
55    private URL ontologyLocation = null;
56  
57    private String annotationSetName = null;
58  
59    public RDFFormatExporter() {
60    }
61  
62    /** Java bean style mutator for exportedTypes */
63    public void setExportedTypes(List anExportedTypesList){
64      exportedTypes = anExportedTypesList;
65    }// setExportedTypes();
66  
67  
68    /** Java bean style accesor for exportedTypes */
69    public List getExportedTypes(){
70      return exportedTypes;
71    }// getExportedTypes()
72  
73    /** Java bean style mutator for exportedTypes */
74    public void setExportFormat(String format){
75  
76      Assert.assertTrue(format.equalsIgnoreCase(EXPORT_FORMATS[DAML_EXPORT]) ||
77                        format.equalsIgnoreCase(EXPORT_FORMATS[RDF_EXPORT]));
78  
79      if (format.equalsIgnoreCase(EXPORT_FORMATS[DAML_EXPORT])) {
80        this.exportFormat = DAML_EXPORT;
81      }
82      else if (format.equalsIgnoreCase(EXPORT_FORMATS[RDF_EXPORT])) {
83        this.exportFormat = RDF_EXPORT;
84      }
85      else {
86        Assert.fail();
87      }
88  
89    }// setExportedTypes();
90  
91    /** Java bean style mutator for exportedTypes */
92    public String getExportFormat() {
93      return EXPORT_FORMATS[this.exportFormat];
94    }// setExportedTypes();
95  
96    /** Java bean style mutator for exportFilePath */
97    public void setExportFilePath(URL anExportFilePath){
98      exportFilePath = anExportFilePath;
99    }// setExportFilePath();
100 
101   /** Java bean style accesor for exportFilePath */
102   public URL getExportFilePath(){
103     return exportFilePath;
104   }// getDtdFileName()
105 
106   /** Java bean style mutator for exportFilePath */
107   public void setOntology(URL _ontologyLocation){
108     ontologyLocation = _ontologyLocation;
109   }// setExportFilePath();
110 
111   /** Java bean style accesor for exportFilePath */
112   public URL getOntology(){
113     return ontologyLocation;
114   }// getDtdFileName()
115 
116   /** Java bean style accessor for annotationSetName */
117   public String getAnnotationSetName() {
118     return annotationSetName;
119   } //getAnnotationSetName
120 
121 
122   /** Java bean style mutator for annotaionSetName */
123   public void setAnnotationSetName(String annotationSetName) {
124     this.annotationSetName = annotationSetName;
125   }
126 
127   /** Initialise this resource, and returns it. */
128   public gate.Resource init() throws ResourceInstantiationException {
129     return this;
130   } // init()
131 
132 
133   /** Run the resource and does the entire export process*/
134   public void execute() throws ExecutionException{
135 
136     // Check if the thing can be run
137     if(document == null) {
138       throw new ExecutionException("No document found to export in APF format!");
139     }
140 
141     /* Commented by Niraj to include support for annotationSetName where all the
142      * annotations should be exported incase exportedTypes is null
143     if (exportedTypes == null) {
144       throw new ExecutionException("No export types found.");
145     }*/
146 
147 //    StringBuffer rdfDoc = new StringBuffer(10*(document.getContent().size().intValue()));
148 
149     String exportFilePathStr = null;
150 
151     if (exportFilePath == null) {
152       exportFilePathStr = new String(document.getSourceUrl().getFile() + "." +
153                                     EXPORT_EXTS[this.exportFormat]);
154     }
155     else {
156       exportFilePathStr = new String(exportFilePath.getPath()+
157                                     "/" +
158                                     document.getName() + "." +
159                                     EXPORT_EXTS[this.exportFormat]);
160     }
161 //System.out.println("export path:" +exportFilePathStr);
162     // Prepare to write into the xmlFile
163     FileWriter  writer = null;
164     try{
165       writer = new FileWriter(exportFilePathStr,false);
166       annotations2ontology(writer);
167       writer.flush();
168       writer.close();
169     }catch (Exception e){
170       throw new ExecutionException(e);
171     }// End try
172 
173   } // execute()
174 
175   private void annotations2ontology(Writer output) throws Exception {
176 
177     DAMLModel ontologyModel, instanceModel;
178     HashMap ontologies = new HashMap();
179     HashMap instanceMatches = new HashMap();
180     HashSet instanceNames = new HashSet();
181 
182       ontologyModel = new DAMLModelImpl();
183       instanceModel = new DAMLModelImpl();
184 
185       Assert.assertNotNull(ontologyModel);
186       Assert.assertNotNull(instanceModel);
187 
188       //final settings of the model
189       DAMLOntology onto = instanceModel.createDAMLOntology("");
190       onto.prop_comment().addValue("autogenerated from GATE RDFFormatExporter");
191       onto.prop_versionInfo().addValue("1.0");
192 
193       Assert.assertNotNull(this.ontologyLocation);
194       ontologyModel.read(this.ontologyLocation.toString());
195 
196       //get a mapping: class name to DAML class
197       HashMap ontologyMap = ontology2hashmap(ontologyModel);
198       Assert.assertNotNull(ontologyMap);
199 
200       //add the mapping to the ontologies hashmap
201       //key is ontology URL as generated by the OntoGaz
202       ontologies.put(this.ontologyLocation.toString(),ontologyMap);
203 
204       if (null == ontologyModel) {
205         throw new ExecutionException("cannot read ontology");
206       }
207 
208       HashMap defaultClasses = new HashMap((int)ontologyModel.size()/5);
209       Iterator itClasses = ontologyModel.listDAMLClasses();
210       while (itClasses.hasNext()) {
211         DAMLClass cls = (DAMLClass)itClasses.next();
212         String className = cls.getLocalName();
213         if (null != className) {
214           defaultClasses.put(className.toLowerCase(),cls);
215         }
216       }
217 
218       //* Addition by Niraj to include AnnotationSet Support */
219       AnnotationSet inputAs = (annotationSetName == null ||
220                                annotationSetName.length() == 0) ?
221                                document.getAnnotations() :
222                                document.getAnnotations(annotationSetName);
223 
224       // see if exportedTypes is null
225       Iterator itTypes = (exportedTypes == null || exportedTypes.size() == 0) ?
226                        inputAs.getAllTypes().iterator() : exportedTypes.iterator();
227 
228       //Iterator itTypes = this.exportedTypes.iterator();
229       // End of addition
230 
231       while (itTypes.hasNext()) {
232 
233         String type = (String)itTypes.next();
234         AnnotationSet as = this.document.getAnnotations().get(type);
235 
236         if (null == as || true == as.isEmpty()) {
237           continue;
238         }
239 
240         Iterator itAnnotations = as.iterator();
241         while (itAnnotations.hasNext()) {
242 
243           Annotation ann = (Annotation)itAnnotations.next();
244           Assert.assertTrue(ann.getType().equals(type));
245 
246           FeatureMap features = ann.getFeatures();
247           String annClass = (String)features.get(ONTOGAZ_CLASS_FEATURE);
248           String annOntology = (String)features.get(ONTOGAZ_ONTOLOGY_FEATURE);
249           DAMLClass damlClass = null;
250 
251           if (null == annClass) {
252             //no ontological info
253             //try to get proper class from the default ontology
254             if (defaultClasses.containsKey(ann.getType().toLowerCase())) {
255               //bingo
256               //we have a class with the name of the annotation's type
257               damlClass = (DAMLClass)defaultClasses.get(ann.getType().toLowerCase());
258               Assert.assertNotNull(damlClass);
259             }
260             else {
261               continue;
262             }
263           }
264           else {
265             //ontological info available
266             //is this a new ontology?
267             if (false == ontologies.containsKey(annOntology)) {
268               //oops, new ontology:
269               //1. create model for it
270               //2. create class name 2 daml class mapping
271               //3. add it to hashmap
272 
273               //1.
274               DAMLModel model = new DAMLModelImpl();
275               model.read(annOntology);
276 
277               //2.
278               //create mapping between class names and DAML classes
279               HashMap name2class = ontology2hashmap(model);
280               Assert.assertNotNull(name2class);
281 
282               //3.
283               ontologies.put(annOntology,model);
284             }
285 
286             //get the class of the annotation
287             damlClass = (DAMLClass)((HashMap)ontologies.get(annOntology)).get(annClass);
288             Assert.assertNotNull(damlClass);
289           }
290 
291           String instanceName = this.document.getContent().getContent(
292                                                                   ann.getStartNode().getOffset(),
293                                                                   ann.getEndNode().getOffset())
294                                 .toString();
295           Assert.assertNotNull(instanceName);
296 
297           //create instance of proper type only if new
298           if (instanceNames.contains(instanceName)) {
299             continue;
300           }
301 
302           DAMLInstance annInstance = instanceModel.createDAMLInstance(damlClass,instanceName);
303           instanceNames.add(instanceName);
304 
305           //check orhtographic matches
306           List matches = (List)ann.getFeatures().get("matches");
307           if (null != matches) {
308             //try to get equiv instance
309             if (instanceMatches.containsKey(matches)) {
310               DAMLInstance equivInstance = (DAMLInstance)instanceMatches.get(matches);
311 
312               //make sure we don't have duplicated name
313               annInstance.prop_sameIndividualAs().add(equivInstance);
314             }
315             else {
316               //first entry of the coref chain
317               instanceMatches.put(matches,annInstance);
318             }
319           }
320 
321 
322         }//while
323       }//while
324 
325       //print the model into file
326       RDFWriter rdfWriter = new RDFWriterFImpl().getWriter("RDF/XML-ABBREV");
327       rdfWriter.setNsPrefix("gate",this.ontologyLocation.toString()+"#");
328       rdfWriter.write(instanceModel,output,null);
329   }
330 
331   private HashMap ontology2hashmap(DAMLModel ontology) throws Exception {
332 
333     HashMap result = null;
334 
335     //0.
336     Assert.assertNotNull(ontology);
337 
338 
339     result = new HashMap((int)ontology.size()/5);
340 
341     //1.Iterate classes
342     Iterator itClasses = ontology.listDAMLClasses();
343     while (itClasses.hasNext()) {
344       DAMLClass clazz = (DAMLClass)itClasses.next();
345       //Assert.assertNotNull(clazz.getLocalName());
346       if (null != clazz.getLocalName()) {
347         result.put(clazz.getLocalName(),clazz);
348       }
349 
350     }
351 
352     return result;
353   }
354 }