|
XmlPositionCorrectionHandler |
|
1 /* 2 * XmlPositionCorrectionHandler.java 3 * 4 * Copyright (c) 1998-2001, The University of Sheffield. 5 * 6 * This file is part of GATE (see http://gate.ac.uk/), and is free 7 * software, licenced under the GNU Library General Public License, 8 * Version 2, June 1991 (in the distribution as file licence.html, 9 * and also available at http://gate.ac.uk/gate/licence.html). 10 * 11 * Angel Kirilov, 4 January 2002 12 * 13 * $Id: XmlPositionCorrectionHandler.java,v 1.2 2002/01/24 13:04:14 nasso Exp $ 14 */ 15 16 package gate.xml; 17 18 import org.xml.sax.helpers.*; 19 20 21 /** 22 * This class correct a Xerces parser bug in reported position in file during 23 * the parsing process. Xerces parser cut processed file to 16K peaces. If 24 * the parser cross the 16K border reported in the characters() position is 25 * zerro. 26 * 27 * This bug could be covered if you extend this content handler instead of 28 * org.xml.sax.helpers.DefaultHandler. 29 * 30 * The real content handler should call methods startDocument() and characters() 31 * in order to compute correct position in file. The corrected position could be 32 * received throug protected data member m_realOffset or with getRealOffset(). 33 */ 34 public class XmlPositionCorrectionHandler extends DefaultHandler { 35 36 /** 37 * Variables for correction of 16K parser limit for offset 38 */ 39 protected long m_realOffset; 40 private int m_lastPosition; 41 private int m_multiplyer; 42 43 /** Constructor for initialization of variables */ 44 public XmlPositionCorrectionHandler() { 45 m_realOffset = 0; 46 m_lastPosition = 0; 47 m_multiplyer = 0; 48 } // XmlPositionCorrectionHandler 49 50 /** Initialization of variables on start of document parsing */ 51 public void startDocument() throws org.xml.sax.SAXException { 52 m_realOffset = 0; 53 m_lastPosition = 0; 54 m_multiplyer = 0; 55 } // startDocument 56 57 /** Return corrected offset for last characters() call */ 58 public long getRealOffset() { 59 return m_realOffset; 60 } // getRealOffset 61 62 /** Here is the correction of the Xerces parser bug. */ 63 public void characters(char[] text, int offset, int len) 64 throws org.xml.sax.SAXException { 65 if(offset == 0 && len == 1 && text.length <= 2) { 66 // unicode char or &xxx; coding 67 return; 68 } // if 69 70 // There is 16K limit for offset. Here is the correction. 71 // Will catch the bug in most cases. 72 if(m_lastPosition - offset > 0x2000) { 73 m_multiplyer++; 74 } 75 m_lastPosition = offset; 76 m_realOffset = m_multiplyer*0x4000+offset; 77 } // characters 78 79 } // XmlPositionCorrectionHandler
|
XmlPositionCorrectionHandler |
|