summaryrefslogtreecommitdiff
path: root/gpr/source/lib/xmp_core/XMLParserAdapter.hpp
blob: ff9b8775262d7406aeb0c295566f3e8f5cd794c2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#ifndef __XMLParserAdapter_hpp__
#define __XMLParserAdapter_hpp__

// =================================================================================================
// Copyright 2005 Adobe Systems Incorporated
// All Rights Reserved.
//
// NOTICE:  Adobe permits you to use, modify, and distribute this file in accordance with the terms
// of the Adobe license agreement accompanying it.
// =================================================================================================

#include "public/include/XMP_Environment.h"	// ! Must be the first #include!
#include "public/include/XMP_Const.h"

#include "XMP_LibUtils.hpp"

#include <string>
#include <vector>

// =================================================================================================
// XML_Node details
//
// The XML_Nodes are used only during the XML/RDF parsing process. This presently uses an XML parser
// to create an XML tree, then a recursive descent RDF recognizer to build the corresponding XMP.
// This makes it easier to swap XML parsers and provides a clean separation of XML and RDF issues.
// The overall parsing would be faster and use less memory if the RDF recognition were done on the
// fly using a state machine. But it was much easier to write the recursive descent version. The
// current implementation is pretty fast in absolute terms, so being faster might not be crucial.
//
// Like the XMP tree, the XML tree contains vectors of pointers for down links, and offspring have
// a pointer to their parent. Unlike the XMP tree, this is an exact XML document tree. There are no
// introduced top level namespace nodes or rearrangement of the nodes..
//
// The exact state of namespaces can vary during the XML parsing, depending on the parser in use.
// By the time the RDF recognition is done though, the namespaces must be normalized. All of the
// used namespaces must be registered, this is done automatically if necessary. All of the "live"
// namespace prefixes will be unique. The ns field of an XML_Node is the namespace URI, the name
// field contains a qualified name (prefix:local). This includes default namespace mapping, the
// URI and prefix will be missing only for elements and attributes in no namespace.

class XML_Node;

typedef XML_Node *	XML_NodePtr;	// Handy for things like: XML_Node * a, b; - b is XML_Node, not XML_Node*!

enum { kRootNode = 0, kElemNode = 1, kAttrNode = 2, kCDataNode = 3, kPINode = 4 };

#define IsWhitespaceChar(ch)	( ((ch) == ' ') || ((ch) == 0x09) || ((ch) == 0x0A) || ((ch) == 0x0D) )

typedef std::vector<XML_NodePtr>		XML_NodeVector;
typedef XML_NodeVector::iterator		XML_NodePos;
typedef XML_NodeVector::const_iterator	XML_cNodePos;

#if 0	// Pattern for iterating over the children or attributes:
	for ( size_t xxNum = 0, xxLim = _node_->_offspring_.size(); xxNum < xxLim; ++xxNum ) {
		const XML_NodePtr _curr_ = _node_->_offspring_[xxNum];
	}
#endif

class XML_Node {
public:

	// Intended for lightweight internal use. Clients are expected to use the data directly.
	
	XMP_Uns8		kind;
	std::string		ns, name, value;
	size_t          nsPrefixLen;
	XML_NodePtr		parent;
	XML_NodeVector	attrs;
	XML_NodeVector	content;
	
	bool IsWhitespaceNode() const;
	bool IsLeafContentNode() const;	// An empty element or one with a single character data child node.
	bool IsEmptyLeafNode() const;

	XMP_StringPtr GetAttrValue ( XMP_StringPtr attrName ) const;
	void SetAttrValue ( XMP_StringPtr attrName, XMP_StringPtr attrValue );

	XMP_StringPtr GetLeafContentValue() const;
	std::string* GetLeafContentPtr() const;
	void SetLeafContentValue ( XMP_StringPtr value );
	
	size_t CountNamedElements ( XMP_StringPtr nsURI, XMP_StringPtr localName ) const;	// Number of child elements with this name.
	XML_NodePtr GetNamedElement ( XMP_StringPtr nsURI, XMP_StringPtr localName, size_t which = 0 );
	
	void Dump ( std::string * buffer );
	void Serialize ( std::string * buffer );
	
	void RemoveAttrs();
	void RemoveContent();
	void ClearNode();

	XML_Node ( XML_NodePtr _parent, XMP_StringPtr _name, XMP_Uns8 _kind )
		: kind(_kind), name(_name), parent(_parent), nsPrefixLen(0) {};

	XML_Node ( XML_NodePtr _parent, const std::string & _name, XMP_Uns8 _kind )
		: kind(_kind), name(_name), parent(_parent), nsPrefixLen(0) {};

	virtual ~XML_Node() { RemoveAttrs(); RemoveContent(); };

private:

	XML_Node() : kind(0), parent(0)	{};	// ! Hidden to make sure parent pointer is always set.

};

// =================================================================================================
// Abstract base class for XML parser adapters used by the XMP toolkit.

enum { kXMLPendingInputMax = 16 };

class XMLParserAdapter {
public:

	XMLParserAdapter() : tree(0,"",kRootNode), rootNode(0), rootCount(0),
	                     charEncoding(XMP_OptionBits(-1)), pendingCount(0),
	                     errorCallback(0)
	{
		#if XMP_DebugBuild
			parseLog = 0;
		#endif
	};

	virtual ~XMLParserAdapter() {};
	
	virtual void ParseBuffer ( const void * buffer, size_t length, bool last ) = 0;
	
	virtual void SetErrorCallback ( GenericErrorCallback * ec )
		{ this->errorCallback = ec; };

	virtual void NotifyClient ( XMP_ErrorSeverity severity, XMP_Error & error )
	{
		if (this->errorCallback)
			this->errorCallback->NotifyClient( severity, error );
	}

	XML_Node		tree;
	XML_NodeVector	parseStack;
	XML_NodePtr		rootNode;
	size_t			rootCount;

	XMP_OptionBits	charEncoding;
	size_t          pendingCount;
	unsigned char	pendingInput[kXMLPendingInputMax];	// Buffered input for character encoding checks.

	GenericErrorCallback * errorCallback;	// Set if the relevant XMPCore or XMPFiles object has one.

	#if XMP_DebugBuild
		FILE * parseLog;
	#endif

};

// =================================================================================================

#endif	// __XMLParserAdapter_hpp__