View Javadoc

1   /*
2    * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
3    *
4    * This software is open source.
5    * See the bottom of this file for the licence.
6    */
7   
8   package org.dom4j.io;
9   
10  import java.io.File;
11  import java.io.FileInputStream;
12  import java.io.FileNotFoundException;
13  import java.io.InputStream;
14  import java.io.Reader;
15  import java.io.Serializable;
16  import java.net.URL;
17  
18  import org.dom4j.Document;
19  import org.dom4j.DocumentException;
20  import org.dom4j.DocumentFactory;
21  import org.dom4j.ElementHandler;
22  
23  import org.xml.sax.EntityResolver;
24  import org.xml.sax.ErrorHandler;
25  import org.xml.sax.InputSource;
26  import org.xml.sax.SAXException;
27  import org.xml.sax.SAXParseException;
28  import org.xml.sax.XMLFilter;
29  import org.xml.sax.XMLReader;
30  import org.xml.sax.helpers.DefaultHandler;
31  import org.xml.sax.helpers.XMLReaderFactory;
32  
33  /***
34   * <p>
35   * <code>SAXReader</code> creates a DOM4J tree from SAX parsing events.
36   * </p>
37   * 
38   * <p>
39   * The actual SAX parser that is used by this class is configurable so you can
40   * use your favourite SAX parser if you wish. DOM4J comes configured with its
41   * own SAX parser so you do not need to worry about configuring the SAX parser.
42   * </p>
43   * 
44   * <p>
45   * To explicitly configure the SAX parser that is used via Java code you can use
46   * a constructor or use the {@link #setXMLReader(XMLReader)}or {@link
47   * #setXMLReaderClassName(String)} methods.
48   * </p>
49   * 
50   * <p>
51   * If the parser is not specified explicitly then the standard SAX policy of
52   * using the <code>org.xml.sax.driver</code> system property is used to
53   * determine the implementation class of {@link XMLReader}.
54   * </p>
55   * 
56   * <p>
57   * If the <code>org.xml.sax.driver</code> system property is not defined then
58   * JAXP is used via reflection (so that DOM4J is not explicitly dependent on the
59   * JAXP classes) to load the JAXP configured SAXParser. If there is any error
60   * creating a JAXP SAXParser an informational message is output and then the
61   * default (Aelfred) SAX parser is used instead.
62   * </p>
63   * 
64   * <p>
65   * If you are trying to use JAXP to explicitly set your SAX parser and are
66   * experiencing problems, you can turn on verbose error reporting by defining
67   * the system property <code>org.dom4j.verbose</code> to be "true" which will
68   * output a more detailed description of why JAXP could not find a SAX parser
69   * </p>
70   * 
71   * <p>
72   * For more information on JAXP please go to <a
73   * href="http://java.sun.com/xml/">Sun's Java &amp; XML site </a>
74   * </p>
75   * 
76   * @author <a href="mailto:james.strachan@metastuff.com">James Strachan </a>
77   * @version $Revision: 1.58 $
78   */
79  public class SAXReader {
80      private static final String SAX_STRING_INTERNING = 
81              "http://xml.org/sax/features/string-interning";
82      private static final String SAX_NAMESPACE_PREFIXES = 
83              "http://xml.org/sax/features/namespace-prefixes";
84      private static final String SAX_NAMESPACES = 
85              "http://xml.org/sax/features/namespaces";
86      private static final String SAX_DECL_HANDLER = 
87              "http://xml.org/sax/properties/declaration-handler";
88      private static final String SAX_LEXICAL_HANDLER = 
89              "http://xml.org/sax/properties/lexical-handler";
90      private static final String SAX_LEXICALHANDLER = 
91              "http://xml.org/sax/handlers/LexicalHandler";
92  
93      /*** <code>DocumentFactory</code> used to create new document objects */
94      private DocumentFactory factory;
95  
96      /*** <code>XMLReader</code> used to parse the SAX events */
97      private XMLReader xmlReader;
98  
99      /*** Whether validation should occur */
100     private boolean validating;
101 
102     /*** DispatchHandler to call when each <code>Element</code> is encountered */
103     private DispatchHandler dispatchHandler;
104 
105     /*** ErrorHandler class to use */
106     private ErrorHandler errorHandler;
107 
108     /*** The entity resolver */
109     private EntityResolver entityResolver;
110 
111     /*** Should element & attribute names and namespace URIs be interned? */
112     private boolean stringInternEnabled = true;
113 
114     /*** Should internal DTD declarations be expanded into a List in the DTD */
115     private boolean includeInternalDTDDeclarations = false;
116 
117     /*** Should external DTD declarations be expanded into a List in the DTD */
118     private boolean includeExternalDTDDeclarations = false;
119 
120     /*** Whether adjacent text nodes should be merged */
121     private boolean mergeAdjacentText = false;
122 
123     /*** Holds value of property stripWhitespaceText. */
124     private boolean stripWhitespaceText = false;
125 
126     /*** Should we ignore comments */
127     private boolean ignoreComments = false;
128 
129     /*** Encoding of InputSource - null means system default encoding */
130     private String encoding = null;
131 
132     // private boolean includeExternalGeneralEntities = false;
133     // private boolean includeExternalParameterEntities = false;
134 
135     /*** The SAX filter used to filter SAX events */
136     private XMLFilter xmlFilter;
137 
138     public SAXReader() {
139     }
140 
141     public SAXReader(boolean validating) {
142         this.validating = validating;
143     }
144 
145     public SAXReader(DocumentFactory factory) {
146         this.factory = factory;
147     }
148 
149     public SAXReader(DocumentFactory factory, boolean validating) {
150         this.factory = factory;
151         this.validating = validating;
152     }
153 
154     public SAXReader(XMLReader xmlReader) {
155         this.xmlReader = xmlReader;
156     }
157 
158     public SAXReader(XMLReader xmlReader, boolean validating) {
159         this.xmlReader = xmlReader;
160         this.validating = validating;
161     }
162 
163     public SAXReader(String xmlReaderClassName) throws SAXException {
164         if (xmlReaderClassName != null) {
165             this.xmlReader = XMLReaderFactory
166                     .createXMLReader(xmlReaderClassName);
167         }
168     }
169 
170     public SAXReader(String xmlReaderClassName, boolean validating)
171             throws SAXException {
172         if (xmlReaderClassName != null) {
173             this.xmlReader = XMLReaderFactory
174                     .createXMLReader(xmlReaderClassName);
175         }
176 
177         this.validating = validating;
178     }
179 
180     /***
181      * Allows a SAX property to be set on the underlying SAX parser. This can be
182      * useful to set parser-specific properties such as the location of schema
183      * or DTD resources. Though use this method with caution as it has the
184      * possibility of breaking the standard behaviour. An alternative to calling
185      * this method is to correctly configure an XMLReader object instance and
186      * call the {@link #setXMLReader(XMLReader)}method
187      * 
188      * @param name
189      *            is the SAX property name
190      * @param value
191      *            is the value of the SAX property
192      * 
193      * @throws SAXException
194      *             if the XMLReader could not be created or the property could
195      *             not be changed.
196      */
197     public void setProperty(String name, Object value) throws SAXException {
198         getXMLReader().setProperty(name, value);
199     }
200 
201     /***
202      * Sets a SAX feature on the underlying SAX parser. This can be useful to
203      * set parser-specific features. Though use this method with caution as it
204      * has the possibility of breaking the standard behaviour. An alternative to
205      * calling this method is to correctly configure an XMLReader object
206      * instance and call the {@link #setXMLReader(XMLReader)}method
207      * 
208      * @param name
209      *            is the SAX feature name
210      * @param value
211      *            is the value of the SAX feature
212      * 
213      * @throws SAXException
214      *             if the XMLReader could not be created or the feature could
215      *             not be changed.
216      */
217     public void setFeature(String name, boolean value) throws SAXException {
218         getXMLReader().setFeature(name, value);
219     }
220 
221     /***
222      * <p>
223      * Reads a Document from the given <code>File</code>
224      * </p>
225      * 
226      * @param file
227      *            is the <code>File</code> to read from.
228      * 
229      * @return the newly created Document instance
230      * 
231      * @throws DocumentException
232      *             if an error occurs during parsing.
233      */
234     public Document read(File file) throws DocumentException {
235         try {
236             /*
237              * We cannot convert the file to an URL because if the filename
238              * contains '#' characters, there will be problems with the URL in
239              * the InputSource (because a URL like
240              * http://myhost.com/index#anchor is treated the same as
241              * http://myhost.com/index) Thanks to Christian Oetterli
242              */
243             InputSource source = new InputSource(new FileInputStream(file));
244             if (this.encoding != null) {
245                 source.setEncoding(this.encoding);
246             }
247             String path = file.getAbsolutePath();
248 
249             if (path != null) {
250                 // Code taken from Ant FileUtils
251                 StringBuffer sb = new StringBuffer("file://");
252 
253                 // add an extra slash for filesystems with drive-specifiers
254                 if (!path.startsWith(File.separator)) {
255                     sb.append("/");
256                 }
257 
258                 path = path.replace('//', '/');
259                 sb.append(path);
260 
261                 source.setSystemId(sb.toString());
262             }
263 
264             return read(source);
265         } catch (FileNotFoundException e) {
266             throw new DocumentException(e.getMessage(), e);
267         }
268     }
269 
270     /***
271      * <p>
272      * Reads a Document from the given <code>URL</code> using SAX
273      * </p>
274      * 
275      * @param url
276      *            <code>URL</code> to read from.
277      * 
278      * @return the newly created Document instance
279      * 
280      * @throws DocumentException
281      *             if an error occurs during parsing.
282      */
283     public Document read(URL url) throws DocumentException {
284         String systemID = url.toExternalForm();
285 
286         InputSource source = new InputSource(systemID);
287         if (this.encoding != null) {
288             source.setEncoding(this.encoding);
289         }
290 
291         return read(source);
292     }
293 
294     /***
295      * <p>
296      * Reads a Document from the given URL or filename using SAX.
297      * </p>
298      * 
299      * <p>
300      * If the systemId contains a <code>':'</code> character then it is
301      * assumed to be a URL otherwise its assumed to be a file name. If you want
302      * finer grained control over this mechansim then please explicitly pass in
303      * either a {@link URL}or a {@link File}instance instead of a {@link
304      * String} to denote the source of the document.
305      * </p>
306      * 
307      * @param systemId
308      *            is a URL for a document or a file name.
309      * 
310      * @return the newly created Document instance
311      * 
312      * @throws DocumentException
313      *             if an error occurs during parsing.
314      */
315     public Document read(String systemId) throws DocumentException {
316         InputSource source = new InputSource(systemId);
317         if (this.encoding != null) {
318             source.setEncoding(this.encoding);
319         }
320 
321         return read(source);
322     }
323 
324     /***
325      * <p>
326      * Reads a Document from the given stream using SAX
327      * </p>
328      * 
329      * @param in
330      *            <code>InputStream</code> to read from.
331      * 
332      * @return the newly created Document instance
333      * 
334      * @throws DocumentException
335      *             if an error occurs during parsing.
336      */
337     public Document read(InputStream in) throws DocumentException {
338         InputSource source = new InputSource(in);
339         if (this.encoding != null) {
340             source.setEncoding(this.encoding);
341         }
342 
343         return read(source);
344     }
345 
346     /***
347      * <p>
348      * Reads a Document from the given <code>Reader</code> using SAX
349      * </p>
350      * 
351      * @param reader
352      *            is the reader for the input
353      * 
354      * @return the newly created Document instance
355      * 
356      * @throws DocumentException
357      *             if an error occurs during parsing.
358      */
359     public Document read(Reader reader) throws DocumentException {
360         InputSource source = new InputSource(reader);
361         if (this.encoding != null) {
362             source.setEncoding(this.encoding);
363         }
364 
365         return read(source);
366     }
367 
368     /***
369      * <p>
370      * Reads a Document from the given stream using SAX
371      * </p>
372      * 
373      * @param in
374      *            <code>InputStream</code> to read from.
375      * @param systemId
376      *            is the URI for the input
377      * 
378      * @return the newly created Document instance
379      * 
380      * @throws DocumentException
381      *             if an error occurs during parsing.
382      */
383     public Document read(InputStream in, String systemId)
384             throws DocumentException {
385         InputSource source = new InputSource(in);
386         source.setSystemId(systemId);
387         if (this.encoding != null) {
388             source.setEncoding(this.encoding);
389         }
390 
391         return read(source);
392     }
393 
394     /***
395      * <p>
396      * Reads a Document from the given <code>Reader</code> using SAX
397      * </p>
398      * 
399      * @param reader
400      *            is the reader for the input
401      * @param systemId
402      *            is the URI for the input
403      * 
404      * @return the newly created Document instance
405      * 
406      * @throws DocumentException
407      *             if an error occurs during parsing.
408      */
409     public Document read(Reader reader, String systemId)
410             throws DocumentException {
411         InputSource source = new InputSource(reader);
412         source.setSystemId(systemId);
413         if (this.encoding != null) {
414             source.setEncoding(this.encoding);
415         }
416 
417         return read(source);
418     }
419 
420     /***
421      * <p>
422      * Reads a Document from the given <code>InputSource</code> using SAX
423      * </p>
424      * 
425      * @param in
426      *            <code>InputSource</code> to read from.
427      * 
428      * @return the newly created Document instance
429      * 
430      * @throws DocumentException
431      *             if an error occurs during parsing.
432      */
433     public Document read(InputSource in) throws DocumentException {
434         try {
435             XMLReader reader = getXMLReader();
436 
437             reader = installXMLFilter(reader);
438 
439             EntityResolver thatEntityResolver = this.entityResolver;
440 
441             if (thatEntityResolver == null) {
442                 thatEntityResolver = createDefaultEntityResolver(in
443                         .getSystemId());
444                 this.entityResolver = thatEntityResolver;
445             }
446 
447             reader.setEntityResolver(thatEntityResolver);
448 
449             SAXContentHandler contentHandler = createContentHandler(reader);
450             contentHandler.setEntityResolver(thatEntityResolver);
451             contentHandler.setInputSource(in);
452 
453             boolean internal = isIncludeInternalDTDDeclarations();
454             boolean external = isIncludeExternalDTDDeclarations();
455 
456             contentHandler.setIncludeInternalDTDDeclarations(internal);
457             contentHandler.setIncludeExternalDTDDeclarations(external);
458             contentHandler.setMergeAdjacentText(isMergeAdjacentText());
459             contentHandler.setStripWhitespaceText(isStripWhitespaceText());
460             contentHandler.setIgnoreComments(isIgnoreComments());
461             reader.setContentHandler(contentHandler);
462 
463             configureReader(reader, contentHandler);
464 
465             reader.parse(in);
466 
467             return contentHandler.getDocument();
468         } catch (Exception e) {
469             if (e instanceof SAXParseException) {
470                 // e.printStackTrace();
471                 SAXParseException parseException = (SAXParseException) e;
472                 String systemId = parseException.getSystemId();
473 
474                 if (systemId == null) {
475                     systemId = "";
476                 }
477 
478                 String message = "Error on line "
479                         + parseException.getLineNumber() + " of document "
480                         + systemId + " : " + parseException.getMessage();
481 
482                 throw new DocumentException(message, e);
483             } else {
484                 throw new DocumentException(e.getMessage(), e);
485             }
486         }
487     }
488 
489     // Properties
490     // -------------------------------------------------------------------------
491 
492     /***
493      * DOCUMENT ME!
494      * 
495      * @return the validation mode, true if validating will be done otherwise
496      *         false.
497      */
498     public boolean isValidating() {
499         return validating;
500     }
501 
502     /***
503      * Sets the validation mode.
504      * 
505      * @param validation
506      *            indicates whether or not validation should occur.
507      */
508     public void setValidation(boolean validation) {
509         this.validating = validation;
510     }
511 
512     /***
513      * DOCUMENT ME!
514      * 
515      * @return whether internal DTD declarations should be expanded into the
516      *         DocumentType object or not.
517      */
518     public boolean isIncludeInternalDTDDeclarations() {
519         return includeInternalDTDDeclarations;
520     }
521 
522     /***
523      * Sets whether internal DTD declarations should be expanded into the
524      * DocumentType object or not.
525      * 
526      * @param include
527      *            whether or not DTD declarations should be expanded and
528      *            included into the DocumentType object.
529      */
530     public void setIncludeInternalDTDDeclarations(boolean include) {
531         this.includeInternalDTDDeclarations = include;
532     }
533 
534     /***
535      * DOCUMENT ME!
536      * 
537      * @return whether external DTD declarations should be expanded into the
538      *         DocumentType object or not.
539      */
540     public boolean isIncludeExternalDTDDeclarations() {
541         return includeExternalDTDDeclarations;
542     }
543 
544     /***
545      * Sets whether DTD external declarations should be expanded into the
546      * DocumentType object or not.
547      * 
548      * @param include
549      *            whether or not DTD declarations should be expanded and
550      *            included into the DocumentType object.
551      */
552     public void setIncludeExternalDTDDeclarations(boolean include) {
553         this.includeExternalDTDDeclarations = include;
554     }
555 
556     /***
557      * Sets whether String interning is enabled or disabled for element &
558      * attribute names and namespace URIs. This proprety is enabled by default.
559      * 
560      * @return DOCUMENT ME!
561      */
562     public boolean isStringInternEnabled() {
563         return stringInternEnabled;
564     }
565 
566     /***
567      * Sets whether String interning is enabled or disabled for element &
568      * attribute names and namespace URIs
569      * 
570      * @param stringInternEnabled
571      *            DOCUMENT ME!
572      */
573     public void setStringInternEnabled(boolean stringInternEnabled) {
574         this.stringInternEnabled = stringInternEnabled;
575     }
576 
577     /***
578      * Returns whether adjacent text nodes should be merged together.
579      * 
580      * @return Value of property mergeAdjacentText.
581      */
582     public boolean isMergeAdjacentText() {
583         return mergeAdjacentText;
584     }
585 
586     /***
587      * Sets whether or not adjacent text nodes should be merged together when
588      * parsing.
589      * 
590      * @param mergeAdjacentText
591      *            New value of property mergeAdjacentText.
592      */
593     public void setMergeAdjacentText(boolean mergeAdjacentText) {
594         this.mergeAdjacentText = mergeAdjacentText;
595     }
596 
597     /***
598      * Sets whether whitespace between element start and end tags should be
599      * ignored
600      * 
601      * @return Value of property stripWhitespaceText.
602      */
603     public boolean isStripWhitespaceText() {
604         return stripWhitespaceText;
605     }
606 
607     /***
608      * Sets whether whitespace between element start and end tags should be
609      * ignored.
610      * 
611      * @param stripWhitespaceText
612      *            New value of property stripWhitespaceText.
613      */
614     public void setStripWhitespaceText(boolean stripWhitespaceText) {
615         this.stripWhitespaceText = stripWhitespaceText;
616     }
617 
618     /***
619      * Returns whether we should ignore comments or not.
620      * 
621      * @return boolean
622      */
623     public boolean isIgnoreComments() {
624         return ignoreComments;
625     }
626 
627     /***
628      * Sets whether we should ignore comments or not.
629      * 
630      * @param ignoreComments
631      *            whether we should ignore comments or not.
632      */
633     public void setIgnoreComments(boolean ignoreComments) {
634         this.ignoreComments = ignoreComments;
635     }
636 
637     /***
638      * DOCUMENT ME!
639      * 
640      * @return the <code>DocumentFactory</code> used to create document
641      *         objects
642      */
643     public DocumentFactory getDocumentFactory() {
644         if (factory == null) {
645             factory = DocumentFactory.getInstance();
646         }
647 
648         return factory;
649     }
650 
651     /***
652      * <p>
653      * This sets the <code>DocumentFactory</code> used to create new
654      * documents. This method allows the building of custom DOM4J tree objects
655      * to be implemented easily using a custom derivation of
656      * {@link DocumentFactory}
657      * </p>
658      * 
659      * @param documentFactory
660      *            <code>DocumentFactory</code> used to create DOM4J objects
661      */
662     public void setDocumentFactory(DocumentFactory documentFactory) {
663         this.factory = documentFactory;
664     }
665 
666     /***
667      * DOCUMENT ME!
668      * 
669      * @return the <code>ErrorHandler</code> used by SAX
670      */
671     public ErrorHandler getErrorHandler() {
672         return errorHandler;
673     }
674 
675     /***
676      * Sets the <code>ErrorHandler</code> used by the SAX
677      * <code>XMLReader</code>.
678      * 
679      * @param errorHandler
680      *            is the <code>ErrorHandler</code> used by SAX
681      */
682     public void setErrorHandler(ErrorHandler errorHandler) {
683         this.errorHandler = errorHandler;
684     }
685 
686     /***
687      * Returns the current entity resolver used to resolve entities
688      * 
689      * @return DOCUMENT ME!
690      */
691     public EntityResolver getEntityResolver() {
692         return entityResolver;
693     }
694 
695     /***
696      * Sets the entity resolver used to resolve entities.
697      * 
698      * @param entityResolver
699      *            DOCUMENT ME!
700      */
701     public void setEntityResolver(EntityResolver entityResolver) {
702         this.entityResolver = entityResolver;
703     }
704 
705     /***
706      * DOCUMENT ME!
707      * 
708      * @return the <code>XMLReader</code> used to parse SAX events
709      * 
710      * @throws SAXException
711      *             DOCUMENT ME!
712      */
713     public XMLReader getXMLReader() throws SAXException {
714         if (xmlReader == null) {
715             xmlReader = createXMLReader();
716         }
717 
718         return xmlReader;
719     }
720 
721     /***
722      * Sets the <code>XMLReader</code> used to parse SAX events
723      * 
724      * @param reader
725      *            is the <code>XMLReader</code> to parse SAX events
726      */
727     public void setXMLReader(XMLReader reader) {
728         this.xmlReader = reader;
729     }
730 
731     /***
732      * Returns encoding used for InputSource (null means system default
733      * encoding)
734      * 
735      * @return encoding used for InputSource
736      * 
737      */
738     public String getEncoding() {
739         return encoding;
740     }
741 
742     /***
743      * Sets encoding used for InputSource (null means system default encoding)
744      * 
745      * @param encoding
746      *            is encoding used for InputSource
747      */
748     public void setEncoding(String encoding) {
749         this.encoding = encoding;
750     }
751 
752     /***
753      * Sets the class name of the <code>XMLReader</code> to be used to parse
754      * SAX events.
755      * 
756      * @param xmlReaderClassName
757      *            is the class name of the <code>XMLReader</code> to parse SAX
758      *            events
759      * 
760      * @throws SAXException
761      *             DOCUMENT ME!
762      */
763     public void setXMLReaderClassName(String xmlReaderClassName)
764             throws SAXException {
765         setXMLReader(XMLReaderFactory.createXMLReader(xmlReaderClassName));
766     }
767 
768     /***
769      * Adds the <code>ElementHandler</code> to be called when the specified
770      * path is encounted.
771      * 
772      * @param path
773      *            is the path to be handled
774      * @param handler
775      *            is the <code>ElementHandler</code> to be called by the event
776      *            based processor.
777      */
778     public void addHandler(String path, ElementHandler handler) {
779         getDispatchHandler().addHandler(path, handler);
780     }
781 
782     /***
783      * Removes the <code>ElementHandler</code> from the event based processor,
784      * for the specified path.
785      * 
786      * @param path
787      *            is the path to remove the <code>ElementHandler</code> for.
788      */
789     public void removeHandler(String path) {
790         getDispatchHandler().removeHandler(path);
791     }
792 
793     /***
794      * When multiple <code>ElementHandler</code> instances have been
795      * registered, this will set a default <code>ElementHandler</code> to be
796      * called for any path which does <b>NOT </b> have a handler registered.
797      * 
798      * @param handler
799      *            is the <code>ElementHandler</code> to be called by the event
800      *            based processor.
801      */
802     public void setDefaultHandler(ElementHandler handler) {
803         getDispatchHandler().setDefaultHandler(handler);
804     }
805 
806     /***
807      * This method clears out all the existing handlers and default handler
808      * setting things back as if no handler existed. Useful when reusing an
809      * object instance.
810      */
811     public void resetHandlers() {
812         getDispatchHandler().resetHandlers();
813     }
814 
815     /***
816      * Returns the SAX filter being used to filter SAX events.
817      * 
818      * @return the SAX filter being used or null if no SAX filter is installed
819      */
820     public XMLFilter getXMLFilter() {
821         return xmlFilter;
822     }
823 
824     /***
825      * Sets the SAX filter to be used when filtering SAX events
826      * 
827      * @param filter
828      *            is the SAX filter to use or null to disable filtering
829      */
830     public void setXMLFilter(XMLFilter filter) {
831         this.xmlFilter = filter;
832     }
833 
834     // Implementation methods
835     // -------------------------------------------------------------------------
836 
837     /***
838      * Installs any XMLFilter objects required to allow the SAX event stream to
839      * be filtered and preprocessed before it gets to dom4j.
840      * 
841      * @param reader
842      *            DOCUMENT ME!
843      * 
844      * @return the new XMLFilter if applicable or the original XMLReader if no
845      *         filter is being used.
846      */
847     protected XMLReader installXMLFilter(XMLReader reader) {
848         XMLFilter filter = getXMLFilter();
849 
850         if (filter != null) {
851             // find the root XMLFilter
852             XMLFilter root = filter;
853 
854             while (true) {
855                 XMLReader parent = root.getParent();
856 
857                 if (parent instanceof XMLFilter) {
858                     root = (XMLFilter) parent;
859                 } else {
860                     break;
861                 }
862             }
863 
864             root.setParent(reader);
865 
866             return filter;
867         }
868 
869         return reader;
870     }
871 
872     protected DispatchHandler getDispatchHandler() {
873         if (dispatchHandler == null) {
874             dispatchHandler = new DispatchHandler();
875         }
876 
877         return dispatchHandler;
878     }
879 
880     protected void setDispatchHandler(DispatchHandler dispatchHandler) {
881         this.dispatchHandler = dispatchHandler;
882     }
883 
884     /***
885      * Factory Method to allow alternate methods of creating and configuring
886      * XMLReader objects
887      * 
888      * @return DOCUMENT ME!
889      * 
890      * @throws SAXException
891      *             DOCUMENT ME!
892      */
893     protected XMLReader createXMLReader() throws SAXException {
894         return SAXHelper.createXMLReader(isValidating());
895     }
896 
897     /***
898      * Configures the XMLReader before use
899      * 
900      * @param reader
901      *            DOCUMENT ME!
902      * @param handler
903      *            DOCUMENT ME!
904      * 
905      * @throws DocumentException
906      *             DOCUMENT ME!
907      */
908     protected void configureReader(XMLReader reader, DefaultHandler handler)
909             throws DocumentException {
910         // configure lexical handling
911         SAXHelper.setParserProperty(reader, SAX_LEXICALHANDLER, handler);
912 
913         // try alternate property just in case
914         SAXHelper.setParserProperty(reader, SAX_LEXICAL_HANDLER, handler);
915 
916         // register the DeclHandler
917         if (includeInternalDTDDeclarations || includeExternalDTDDeclarations) {
918             SAXHelper.setParserProperty(reader, SAX_DECL_HANDLER, handler);
919         }
920 
921         // configure namespace support
922         SAXHelper.setParserFeature(reader, SAX_NAMESPACES, true);
923 
924         SAXHelper.setParserFeature(reader, SAX_NAMESPACE_PREFIXES, false);
925 
926         // string interning
927         SAXHelper.setParserFeature(reader, SAX_STRING_INTERNING,
928                 isStringInternEnabled());
929 
930         // external entites
931         /*
932          * SAXHelper.setParserFeature( reader,
933          * "http://xml.org/sax/properties/external-general-entities",
934          * includeExternalGeneralEntities ); SAXHelper.setParserFeature( reader,
935          * "http://xml.org/sax/properties/external-parameter-entities",
936          * includeExternalParameterEntities );
937          */
938         // use Locator2 if possible
939         SAXHelper.setParserFeature(reader,
940                 "http://xml.org/sax/features/use-locator2", true);
941 
942         try {
943             // configure validation support
944             reader.setFeature("http://xml.org/sax/features/validation",
945                     isValidating());
946 
947             if (errorHandler != null) {
948                 reader.setErrorHandler(errorHandler);
949             } else {
950                 reader.setErrorHandler(handler);
951             }
952         } catch (Exception e) {
953             if (isValidating()) {
954                 throw new DocumentException("Validation not supported for"
955                         + " XMLReader: " + reader, e);
956             }
957         }
958     }
959 
960     /***
961      * Factory Method to allow user derived SAXContentHandler objects to be used
962      * 
963      * @param reader
964      *            DOCUMENT ME!
965      * 
966      * @return DOCUMENT ME!
967      */
968     protected SAXContentHandler createContentHandler(XMLReader reader) {
969         return new SAXContentHandler(getDocumentFactory(), dispatchHandler);
970     }
971 
972     protected EntityResolver createDefaultEntityResolver(String systemId) {
973         String prefix = null;
974 
975         if ((systemId != null) && (systemId.length() > 0)) {
976             int idx = systemId.lastIndexOf('/');
977 
978             if (idx > 0) {
979                 prefix = systemId.substring(0, idx + 1);
980             }
981         }
982 
983         return new SAXEntityResolver(prefix);
984     }
985 
986     protected static class SAXEntityResolver implements EntityResolver,
987             Serializable {
988         protected String uriPrefix;
989 
990         public SAXEntityResolver(String uriPrefix) {
991             this.uriPrefix = uriPrefix;
992         }
993 
994         public InputSource resolveEntity(String publicId, String systemId) {
995             // try create a relative URI reader...
996             if ((systemId != null) && (systemId.length() > 0)) {
997                 if ((uriPrefix != null) && (systemId.indexOf(':') <= 0)) {
998                     systemId = uriPrefix + systemId;
999                 }
1000             }
1001 
1002             return new InputSource(systemId);
1003         }
1004     }
1005 }
1006 
1007 /*
1008  * Redistribution and use of this software and associated documentation
1009  * ("Software"), with or without modification, are permitted provided that the
1010  * following conditions are met:
1011  * 
1012  * 1. Redistributions of source code must retain copyright statements and
1013  * notices. Redistributions must also contain a copy of this document.
1014  * 
1015  * 2. Redistributions in binary form must reproduce the above copyright notice,
1016  * this list of conditions and the following disclaimer in the documentation
1017  * and/or other materials provided with the distribution.
1018  * 
1019  * 3. The name "DOM4J" must not be used to endorse or promote products derived
1020  * from this Software without prior written permission of MetaStuff, Ltd. For
1021  * written permission, please contact dom4j-info@metastuff.com.
1022  * 
1023  * 4. Products derived from this Software may not be called "DOM4J" nor may
1024  * "DOM4J" appear in their names without prior written permission of MetaStuff,
1025  * Ltd. DOM4J is a registered trademark of MetaStuff, Ltd.
1026  * 
1027  * 5. Due credit should be given to the DOM4J Project - http://www.dom4j.org
1028  * 
1029  * THIS SOFTWARE IS PROVIDED BY METASTUFF, LTD. AND CONTRIBUTORS ``AS IS'' AND
1030  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1031  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1032  * ARE DISCLAIMED. IN NO EVENT SHALL METASTUFF, LTD. OR ITS CONTRIBUTORS BE
1033  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1034  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1035  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1036  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
1037  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1038  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1039  * POSSIBILITY OF SUCH DAMAGE.
1040  * 
1041  * Copyright 2001-2005 (C) MetaStuff, Ltd. All Rights Reserved.
1042  */