1   /*
2    *  TestXml.java
3    *
4    *  Copyright (c) 1998-2001, The University of Sheffield.
5    *
6    *  This file is part of GATE (see http://gate.ac.uk/), and is free
7    *  software, licenced under the GNU Library General Public License,
8    *  Version 2, June 1991 (in the distribution as file licence.html,
9    *  and also available at http://gate.ac.uk/gate/licence.html).
10   *
11   *  Cristian URSU,  8/May/2000
12   *
13   *  $Id: TestXml.java,v 1.53 2002/03/06 17:15:49 kalina Exp $
14   */
15  
16  package gate.xml;
17  
18  import java.util.*;
19  import java.net.*;
20  import java.io.*;
21  import java.beans.*;
22  
23  import gate.util.*;
24  import gate.gui.*;
25  import gate.*;
26  
27  import junit.framework.*;
28  import org.w3c.www.mime.*;
29  
30  
31  /** Test class for XML facilities
32    *
33    */
34  public class TestXml extends TestCase
35  {
36    /** Debug flag */
37    private static final boolean DEBUG = false;
38  
39    /** Construction */
40    public TestXml(String name) { super(name); }
41  
42    /** Fixture set up */
43    public void setUp() {
44    } // setUp
45  
46    public void testGateDocumentToAndFromXmlWithDifferentKindOfFormats()
47                                                                 throws Exception{
48      List urlList = new LinkedList();
49      List urlDescription = new LinkedList();
50      URL url = null;
51  
52      url = Gate.getUrl("tests/xml/xces.xml");
53      assertTrue("Coudn't create a URL object for tests/xml/xces.xml ", url != null);
54      urlList.add(url);
55      urlDescription.add(" an XML document ");
56  
57      url = Gate.getUrl("tests/xml/Sentence.xml");
58      assertTrue("Coudn't create a URL object for tests/xml/Sentence.xml",
59                                                           url != null);
60      urlList.add(url);
61      urlDescription.add(" an XML document ");
62  
63      url = Gate.getUrl("tests/html/test1.htm");
64      assertTrue("Coudn't create a URL object for tests/html/test.htm",url != null);
65      urlList.add(url);
66      urlDescription.add(" an HTML document ");
67  
68      url = Gate.getUrl("tests/rtf/Sample.rtf");
69      assertTrue("Coudn't create a URL object for defg ",url != null);
70      urlList.add(url);
71      urlDescription.add(" a RTF document ");
72  
73  
74      url = Gate.getUrl("tests/email/test2.eml");
75      assertTrue("Coudn't create a URL object for defg ",url != null);
76      urlList.add(url);
77      urlDescription.add(" an EMAIL document ");
78  
79      Iterator iter = urlList.iterator();
80      Iterator descrIter = urlDescription.iterator();
81      while(iter.hasNext()){
82        runCompleteTestWithAFormat((URL) iter.next(),(String)descrIter.next());
83      }// End While
84  
85  
86    }// testGateDocumentToAndFromXmlWithDifferentKindOfFormats
87  
88    private void runCompleteTestWithAFormat(URL url, String urlDescription)
89                                                               throws Exception{
90      // Load the xml Key Document and unpack it
91      gate.Document keyDocument = null;
92  
93      FeatureMap params = Factory.newFeatureMap();
94      params.put(Document.DOCUMENT_URL_PARAMETER_NAME, url);
95      params.put(Document.DOCUMENT_MARKUP_AWARE_PARAMETER_NAME, "false");
96      keyDocument = (Document)Factory.createResource("gate.corpora.DocumentImpl",
97                                                      params);
98  
99      assertTrue("Coudn't create a Gate document instance for " +
100             url.toString() +
101             " Can't continue." , keyDocument != null);
102 
103     gate.DocumentFormat keyDocFormat = null;
104     keyDocFormat = gate.DocumentFormat.getDocumentFormat(
105       keyDocument, keyDocument.getSourceUrl()
106     );
107 
108     assertTrue("Fail to recognize " +
109             url.toString() +
110             " as being " + urlDescription + " !", keyDocFormat != null);
111 
112     // Unpack the markup
113     keyDocFormat.unpackMarkup(keyDocument);
114     // Verfy if all annotations from the default annotation set are consistent
115     gate.corpora.TestDocument.verifyNodeIdConsistency(keyDocument);
116 
117     // Save the size of the document and the number of annotations
118     long keyDocumentSize = keyDocument.getContent().size().longValue();
119     int keyDocumentAnnotationSetSize = keyDocument.getAnnotations().size();
120 
121 
122     // Export the Gate document called keyDocument as  XML, into a temp file,
123     // using UTF-8 encoding
124     File xmlFile = null;
125     xmlFile = Files.writeTempFile(keyDocument.toXml(),"UTF-8");
126     assertTrue("The temp Gate XML file is null. Can't continue.",xmlFile != null);
127 /*
128     // Prepare to write into the xmlFile using UTF-8 encoding
129     OutputStreamWriter writer = new OutputStreamWriter(
130                     new FileOutputStream(xmlFile),"UTF-8");
131     // Write (test the toXml() method)
132     writer.write(keyDocument.toXml());
133     writer.flush();
134     writer.close();
135 */
136     // Load the XML Gate document form the tmp file into memory
137     gate.Document gateDoc = null;
138     gateDoc = gate.Factory.newDocument(xmlFile.toURL());
139 
140     assertTrue("Coudn't create a Gate document instance for " +
141                 xmlFile.toURL().toString() +
142                 " Can't continue." , gateDoc != null);
143 
144     gate.DocumentFormat gateDocFormat = null;
145     gateDocFormat =
146             DocumentFormat.getDocumentFormat(gateDoc,gateDoc.getSourceUrl());
147 
148     assertTrue("Fail to recognize " +
149       xmlFile.toURL().toString() +
150       " as being a Gate XML document !", gateDocFormat != null);
151 
152     gateDocFormat.unpackMarkup(gateDoc);
153     // Verfy if all annotations from the default annotation set are consistent
154     gate.corpora.TestDocument.verifyNodeIdConsistency(gateDoc);
155 
156     // Save the size of the document snd the number of annotations
157     long gateDocSize = keyDocument.getContent().size().longValue();
158     int gateDocAnnotationSetSize = keyDocument.getAnnotations().size();
159 
160     assertTrue("Exporting as Gate XML resulted in document content size lost." +
161       " Something went wrong.", keyDocumentSize == gateDocSize);
162 
163     assertTrue("Exporting as Gate XML resulted in annotation lost." +
164       " No. of annotations missing =  " +
165       Math.abs(keyDocumentAnnotationSetSize - gateDocAnnotationSetSize),
166       keyDocumentAnnotationSetSize == gateDocAnnotationSetSize);
167 
168     //Don't need tmp Gate XML file.
169     xmlFile.delete();
170   }//runCompleteTestWithAFormat
171 
172   /** A test */
173   public void testUnpackMarkup() throws Exception{
174     // create the markupElementsMap map
175     Map markupElementsMap = null;
176     gate.Document doc = null;
177     /*
178     markupElementsMap = new HashMap();
179     // populate it
180     markupElementsMap.put ("S","Sentence");
181     markupElementsMap.put ("s","Sentence");
182     */
183     // Create the element2String map
184     Map anElement2StringMap = null;
185     anElement2StringMap = new HashMap();
186     // Populate it
187     anElement2StringMap.put("S","\n");
188     anElement2StringMap.put("s","\n");
189 
190     doc = gate.Factory.newDocument(Gate.getUrl("tests/xml/xces.xml"));
191  //doc = gate.Factory.newDocument(new URL("file:///z:/gu.xml"));
192 
193     AnnotationSet annotSet = doc.getAnnotations(
194                         GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
195     assertEquals("For "+doc.getSourceUrl()+" the number of annotations"+
196     " should be:758",758,annotSet.size());
197 
198     gate.corpora.TestDocument.verifyNodeIdConsistency(doc);
199   } // testUnpackMarkup()
200 
201   /** Test suite routine for the test runner */
202   public static Test suite() {
203     return new TestSuite(TestXml.class);
204   } // suite
205 
206 } // class TestXml
207