View Javadoc
1   /*
2    * Copyright 2004-2025 the original author or authors.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *    https://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package com.ibatis.common.xml;
17  
18  import java.io.IOException;
19  import java.io.InputStream;
20  import java.io.Reader;
21  import java.util.ArrayList;
22  import java.util.HashMap;
23  import java.util.List;
24  import java.util.Map;
25  import java.util.StringTokenizer;
26  
27  import javax.xml.XMLConstants;
28  import javax.xml.parsers.DocumentBuilder;
29  import javax.xml.parsers.DocumentBuilderFactory;
30  import javax.xml.parsers.FactoryConfigurationError;
31  import javax.xml.parsers.ParserConfigurationException;
32  
33  import org.w3c.dom.Document;
34  import org.w3c.dom.Element;
35  import org.w3c.dom.NamedNodeMap;
36  import org.w3c.dom.Node;
37  import org.w3c.dom.NodeList;
38  import org.w3c.dom.Text;
39  import org.xml.sax.EntityResolver;
40  import org.xml.sax.ErrorHandler;
41  import org.xml.sax.InputSource;
42  import org.xml.sax.SAXException;
43  import org.xml.sax.SAXParseException;
44  
45  /**
46   * The NodeletParser is a callback based parser similar to SAX. The big difference is that rather than having a single
47   * callback for all nodes, the NodeletParser has a number of callbacks mapped to various nodes. The callback is called a
48   * Nodelet and it is registered with the NodeletParser against a specific XPath.
49   */
50  public class NodeletParser {
51  
52    /** The let map. */
53    private Map letMap = new HashMap<>();
54  
55    /** The validation. */
56    private boolean validation;
57  
58    /** The entity resolver. */
59    private EntityResolver entityResolver;
60  
61    /**
62     * Registers a nodelet for the specified XPath. Current XPaths supported are:
63     * <ul>
64     * <li>Text Path - /rootElement/childElement/text()
65     * <li>Attribute Path - /rootElement/childElement/@theAttribute
66     * <li>Element Path - /rootElement/childElement/theElement
67     * <li>All Elements Named - //theElement
68     * </ul>
69     *
70     * @param xpath
71     *          the xpath
72     * @param nodelet
73     *          the nodelet
74     */
75    public void addNodelet(String xpath, Nodelet nodelet) {
76      letMap.put(xpath, nodelet);
77    }
78  
79    /**
80     * Begins parsing from the provided Reader.
81     *
82     * @param reader
83     *          the reader
84     *
85     * @throws NodeletException
86     *           the nodelet exception
87     */
88    public void parse(Reader reader) throws NodeletException {
89      try {
90        Document doc = createDocument(reader);
91        parse(doc.getLastChild());
92      } catch (Exception e) {
93        throw new NodeletException("Error parsing XML.  Cause: " + e, e);
94      }
95    }
96  
97    /**
98     * Parses the.
99     *
100    * @param inputStream
101    *          the input stream
102    *
103    * @throws NodeletException
104    *           the nodelet exception
105    */
106   public void parse(InputStream inputStream) throws NodeletException {
107     try {
108       Document doc = createDocument(inputStream);
109       parse(doc.getLastChild());
110     } catch (Exception e) {
111       throw new NodeletException("Error parsing XML.  Cause: " + e, e);
112     }
113   }
114 
115   /**
116    * Begins parsing from the provided Node.
117    *
118    * @param node
119    *          the node
120    */
121   public void parse(Node node) {
122     Path path = new Path();
123     processNodelet(node, "/");
124     process(node, path);
125   }
126 
127   /**
128    * A recursive method that walkes the DOM tree, registers XPaths and calls Nodelets registered under those XPaths.
129    *
130    * @param node
131    *          the node
132    * @param path
133    *          the path
134    */
135   private void process(Node node, Path path) {
136     if (node instanceof Element) {
137       // Element
138       String elementName = node.getNodeName();
139       path.add(elementName);
140       processNodelet(node, path.toString());
141       processNodelet(node, new StringBuilder("//").append(elementName).toString());
142 
143       // Attribute
144       NamedNodeMap attributes = node.getAttributes();
145       int n = attributes.getLength();
146       for (int i = 0; i < n; i++) {
147         Node att = attributes.item(i);
148         String attrName = att.getNodeName();
149         path.add("@" + attrName);
150         processNodelet(att, path.toString());
151         processNodelet(node, new StringBuilder("//@").append(attrName).toString());
152         path.remove();
153       }
154 
155       // Children
156       NodeList children = node.getChildNodes();
157       for (int i = 0; i < children.getLength(); i++) {
158         process(children.item(i), path);
159       }
160       path.add("end()");
161       processNodelet(node, path.toString());
162       path.remove();
163       path.remove();
164     } else if (node instanceof Text) {
165       // Text
166       path.add("text()");
167       processNodelet(node, path.toString());
168       processNodelet(node, "//text()");
169       path.remove();
170     }
171   }
172 
173   /**
174    * Process nodelet.
175    *
176    * @param node
177    *          the node
178    * @param pathString
179    *          the path string
180    */
181   private void processNodelet(Node node, String pathString) {
182     Nodelet nodelet = (Nodelet) letMap.get(pathString);
183     if (nodelet != null) {
184       try {
185         nodelet.process(node);
186       } catch (Exception e) {
187         throw new RuntimeException("Error parsing XPath '" + pathString + "'.  Cause: " + e, e);
188       }
189     }
190   }
191 
192   /**
193    * Creates a JAXP Document from a reader.
194    *
195    * @param reader
196    *          the reader
197    *
198    * @return the document
199    *
200    * @throws ParserConfigurationException
201    *           the parser configuration exception
202    * @throws FactoryConfigurationError
203    *           the factory configuration error
204    * @throws SAXException
205    *           the SAX exception
206    * @throws IOException
207    *           Signals that an I/O exception has occurred.
208    */
209   private Document createDocument(Reader reader)
210       throws ParserConfigurationException, FactoryConfigurationError, SAXException, IOException {
211     DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
212     factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
213     factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
214     factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
215     factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
216     factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
217     factory.setValidating(validation);
218 
219     factory.setNamespaceAware(false);
220     factory.setIgnoringComments(true);
221     factory.setIgnoringElementContentWhitespace(false);
222     factory.setCoalescing(false);
223     factory.setExpandEntityReferences(false);
224 
225     DocumentBuilder builder = factory.newDocumentBuilder();
226     builder.setEntityResolver(entityResolver);
227     builder.setErrorHandler(new ErrorHandler() {
228       @Override
229       public void error(SAXParseException exception) throws SAXException {
230         throw exception;
231       }
232 
233       @Override
234       public void fatalError(SAXParseException exception) throws SAXException {
235         throw exception;
236       }
237 
238       @Override
239       public void warning(SAXParseException exception) throws SAXException {
240       }
241     });
242 
243     return builder.parse(new InputSource(reader));
244   }
245 
246   /**
247    * Creates a JAXP Document from an InoutStream.
248    *
249    * @param inputStream
250    *          the input stream
251    *
252    * @return the document
253    *
254    * @throws ParserConfigurationException
255    *           the parser configuration exception
256    * @throws FactoryConfigurationError
257    *           the factory configuration error
258    * @throws SAXException
259    *           the SAX exception
260    * @throws IOException
261    *           Signals that an I/O exception has occurred.
262    */
263   private Document createDocument(InputStream inputStream)
264       throws ParserConfigurationException, FactoryConfigurationError, SAXException, IOException {
265     DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
266     factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
267     factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
268     factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
269     factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
270     factory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
271     factory.setValidating(validation);
272 
273     factory.setNamespaceAware(false);
274     factory.setIgnoringComments(true);
275     factory.setIgnoringElementContentWhitespace(false);
276     factory.setCoalescing(false);
277     factory.setExpandEntityReferences(false);
278 
279     DocumentBuilder builder = factory.newDocumentBuilder();
280     builder.setEntityResolver(entityResolver);
281     builder.setErrorHandler(new ErrorHandler() {
282       @Override
283       public void error(SAXParseException exception) throws SAXException {
284         throw exception;
285       }
286 
287       @Override
288       public void fatalError(SAXParseException exception) throws SAXException {
289         throw exception;
290       }
291 
292       @Override
293       public void warning(SAXParseException exception) throws SAXException {
294       }
295     });
296 
297     return builder.parse(new InputSource(inputStream));
298   }
299 
300   /**
301    * Sets the validation.
302    *
303    * @param validation
304    *          the new validation
305    */
306   public void setValidation(boolean validation) {
307     this.validation = validation;
308   }
309 
310   /**
311    * Sets the entity resolver.
312    *
313    * @param resolver
314    *          the new entity resolver
315    */
316   public void setEntityResolver(EntityResolver resolver) {
317     this.entityResolver = resolver;
318   }
319 
320   /**
321    * Inner helper class that assists with building XPath paths.
322    * <p>
323    * Note: Currently this is a bit slow and could be optimized.
324    */
325   private static class Path {
326 
327     /** The node list. */
328     private List nodeList = new ArrayList<>();
329 
330     /**
331      * Instantiates a new path.
332      */
333     public Path() {
334     }
335 
336     /**
337      * Instantiates a new path.
338      *
339      * @param path
340      *          the path
341      */
342     public Path(String path) {
343       StringTokenizer parser = new StringTokenizer(path, "/", false);
344       while (parser.hasMoreTokens()) {
345         nodeList.add(parser.nextToken());
346       }
347     }
348 
349     /**
350      * Adds the.
351      *
352      * @param node
353      *          the node
354      */
355     public void add(String node) {
356       nodeList.add(node);
357     }
358 
359     /**
360      * Removes the.
361      */
362     public void remove() {
363       nodeList.remove(nodeList.size() - 1);
364     }
365 
366     @Override
367     public String toString() {
368       StringBuilder builder = new StringBuilder("/");
369       for (int i = 0; i < nodeList.size(); i++) {
370         builder.append(nodeList.get(i));
371         if (i < nodeList.size() - 1) {
372           builder.append("/");
373         }
374       }
375       return builder.toString();
376     }
377   }
378 
379 }