gate.creole
Class APFormatExporter

java.lang.Object
  |
  +--gate.util.AbstractFeatureBearer
        |
        +--gate.creole.AbstractResource
              |
              +--gate.creole.AbstractProcessingResource
                    |
                    +--gate.creole.AbstractLanguageAnalyser
                          |
                          +--gate.creole.APFormatExporter
All Implemented Interfaces:
ANNIEConstants, Executable, FeatureBearer, LanguageAnalyser, NameBearer, ProcessingResource, Resource, Serializable

public class APFormatExporter
extends AbstractLanguageAnalyser
implements ANNIEConstants

This class implements a APF xml exporter. It works on documents or corpora to export them in the APF format.

See Also:
Serialized Form

Inner classes inherited from class gate.creole.AbstractProcessingResource
AbstractProcessingResource.InternalStatusListener, AbstractProcessingResource.IntervalProgressListener
 
Field Summary
static String APF_EXP_DOCUMENT_PARAMETER_NAME
           
static String APF_EXP_DTD_PARAMETER_NAME
           
static String APF_EXP_PATH_PARAMETER_NAME
           
static String APF_EXP_SOURCE_PARAMETER_NAME
           
static String APF_EXP_TYPES_PARAMETER_NAME
           
static String APF_EXP_WRITE_SOURCE_PARAMETER_NAME
           
private static boolean DEBUG
          Debug flag
private  String docId
          This field represent the document id and it is used in generating the entities IDs.
private  String dtdFileName
          This is the name of the dtd file.
private  int entityId
          This field represent an unique entity ID generator
private  List exportedTypes
          This list of strings represents the entities type that will be exported
private  URL exportFilePath
           
private  boolean isSourceWritten
          The source attribute for source
private  String source
          The source attribute for source
private  StringBuffer xmlDoc
          This is the xmlDoc that will be created
 
Fields inherited from class gate.creole.AbstractLanguageAnalyser
corpus, document
 
Fields inherited from class gate.creole.AbstractProcessingResource
interrupted, progressListeners, statusListeners
 
Fields inherited from class gate.creole.AbstractResource
name, serialVersionUID
 
Fields inherited from class gate.util.AbstractFeatureBearer
features
 
Fields inherited from interface gate.creole.ANNIEConstants
ANNOTATION_COREF_FEATURE_NAME, DATE_ANNOTATION_TYPE, DOCUMENT_COREF_FEATURE_NAME, LOCATION_ANNOTATION_TYPE, LOOKUP_ANNOTATION_TYPE, LOOKUP_MAJOR_TYPE_FEATURE_NAME, LOOKUP_MINOR_TYPE_FEATURE_NAME, MONEY_ANNOTATION_TYPE, ORGANIZATION_ANNOTATION_TYPE, PERSON_ANNOTATION_TYPE, PERSON_GENDER_FEATURE_NAME, PR_NAMES, SENTENCE_ANNOTATION_TYPE, SPACE_TOKEN_ANNOTATION_TYPE, TOKEN_ANNOTATION_TYPE, TOKEN_CATEGORY_FEATURE_NAME, TOKEN_KIND_FEATURE_NAME, TOKEN_LENGTH_FEATURE_NAME, TOKEN_ORTH_FEATURE_NAME, TOKEN_STRING_FEATURE_NAME
 
Constructor Summary
APFormatExporter()
          Constructor does nothing.
 
Method Summary
 void execute()
          Run the resource and does the entire export process
 String getDtdFileName()
          Java bean style accesor for DtdFileName
 List getExportedTypes()
          Java bean style accesor for exportedTypes
 URL getExportFilePath()
          Java bean style accesor for exportFilePath
 Boolean getIsSourceWritten()
          Java bean style accesor for isSourceWritten
private  int getNextEntityId()
          Returns the next safe ID for an entity
 String getSource()
          Java bean style accesor for source
 Resource init()
          Initialise this resource, and returns it.
private  void initDocId()
          Initialises the docId with documents' file name without the complete path
private  void serializeAnEntity(List anEntity)
          Writes an entity in the xmlDoc conforming to APF standards.
private  void serializeAnEntityAttributes(Annotation ann)
          This method serializes an entity attribute from an Annotation
private  void serializeAnEntityMention(Annotation ann)
          This method serializes an entity mention from an Annotation
protected  void serializeDocumentToAPF()
          Returns the xml document conforming to APF dtd.
protected  void serializeEntities()
          Transforms all the entities from exportedTypes found in the GATE document into their xml representation
 void setDtdFileName(String aDtdFileName)
          Java bean style mutator for dtdFileName
 void setExportedTypes(List anExportedTypesList)
          Java bean style mutator for exportedTypes
 void setExportFilePath(URL anExportFilePath)
          Java bean style mutator for exportFilePath
 void setIsSourceWritten(Boolean aIsSourceWritten)
          Java bean style mutator for isSourceWritten
 void setSource(String aSource)
          Java bean style mutator for source
 
Methods inherited from class gate.creole.AbstractLanguageAnalyser
getCorpus, getDocument, setCorpus, setDocument
 
Methods inherited from class gate.creole.AbstractProcessingResource
addProgressListener, addStatusListener, cleanup, fireProcessFinished, fireProgressChanged, fireStatusChanged, interrupt, isInterrupted, reInit, removeProgressListener, removeStatusListener
 
Methods inherited from class gate.creole.AbstractResource
checkParameterValues, getName, getParameterValue, getParameterValue, removeResourceListeners, setName, setParameterValue, setParameterValue, setParameterValues, setParameterValues, setResourceListeners
 
Methods inherited from class gate.util.AbstractFeatureBearer
getFeatures, setFeatures
 
Methods inherited from class java.lang.Object
, clone, equals, finalize, getClass, hashCode, notify, notifyAll, registerNatives, toString, wait, wait, wait
 
Methods inherited from interface gate.ProcessingResource
interrupt, isInterrupted, reInit
 
Methods inherited from interface gate.Resource
cleanup, getParameterValue, setParameterValue, setParameterValues
 
Methods inherited from interface gate.util.FeatureBearer
getFeatures, setFeatures
 
Methods inherited from interface gate.util.NameBearer
getName, setName
 

Field Detail

APF_EXP_DOCUMENT_PARAMETER_NAME

public static final String APF_EXP_DOCUMENT_PARAMETER_NAME

APF_EXP_SOURCE_PARAMETER_NAME

public static final String APF_EXP_SOURCE_PARAMETER_NAME

APF_EXP_DTD_PARAMETER_NAME

public static final String APF_EXP_DTD_PARAMETER_NAME

APF_EXP_PATH_PARAMETER_NAME

public static final String APF_EXP_PATH_PARAMETER_NAME

APF_EXP_TYPES_PARAMETER_NAME

public static final String APF_EXP_TYPES_PARAMETER_NAME

APF_EXP_WRITE_SOURCE_PARAMETER_NAME

public static final String APF_EXP_WRITE_SOURCE_PARAMETER_NAME

DEBUG

private static final boolean DEBUG
Debug flag

exportedTypes

private List exportedTypes
This list of strings represents the entities type that will be exported

dtdFileName

private String dtdFileName
This is the name of the dtd file. If it's not present no dtd would be written in the APF file.

docId

private String docId
This field represent the document id and it is used in generating the entities IDs. It is the file name of the document, without the extension

entityId

private int entityId
This field represent an unique entity ID generator

xmlDoc

private StringBuffer xmlDoc
This is the xmlDoc that will be created

exportFilePath

private URL exportFilePath

source

private String source
The source attribute for source

isSourceWritten

private boolean isSourceWritten
The source attribute for source
Constructor Detail

APFormatExporter

public APFormatExporter()
Constructor does nothing. This PR is bean like initialized
Method Detail

execute

public void execute()
             throws ExecutionException
Run the resource and does the entire export process
Overrides:
execute in class AbstractProcessingResource

init

public Resource init()
              throws ResourceInstantiationException
Initialise this resource, and returns it.
Overrides:
init in class AbstractProcessingResource

setExportedTypes

public void setExportedTypes(List anExportedTypesList)
Java bean style mutator for exportedTypes

getExportedTypes

public List getExportedTypes()
Java bean style accesor for exportedTypes

setDtdFileName

public void setDtdFileName(String aDtdFileName)
Java bean style mutator for dtdFileName

getDtdFileName

public String getDtdFileName()
Java bean style accesor for DtdFileName

setExportFilePath

public void setExportFilePath(URL anExportFilePath)
Java bean style mutator for exportFilePath

getExportFilePath

public URL getExportFilePath()
Java bean style accesor for exportFilePath

setSource

public void setSource(String aSource)
Java bean style mutator for source

getSource

public String getSource()
Java bean style accesor for source

getIsSourceWritten

public Boolean getIsSourceWritten()
Java bean style accesor for isSourceWritten

setIsSourceWritten

public void setIsSourceWritten(Boolean aIsSourceWritten)
Java bean style mutator for isSourceWritten

initDocId

private void initDocId()
Initialises the docId with documents' file name without the complete path

serializeDocumentToAPF

protected void serializeDocumentToAPF()
Returns the xml document conforming to APF dtd.

serializeEntities

protected void serializeEntities()
Transforms all the entities from exportedTypes found in the GATE document into their xml representation

serializeAnEntity

private void serializeAnEntity(List anEntity)
Writes an entity in the xmlDoc conforming to APF standards.
Parameters:
anEntity - represents a list with annotations that refer the same entity. Those annotations were detected and constructed by the orthomatcher.

serializeAnEntityMention

private void serializeAnEntityMention(Annotation ann)
This method serializes an entity mention from an Annotation

serializeAnEntityAttributes

private void serializeAnEntityAttributes(Annotation ann)
This method serializes an entity attribute from an Annotation

getNextEntityId

private int getNextEntityId()
Returns the next safe ID for an entity