00001 /***************************************************************************** 00002 * 00003 * \file xml.h <mi32/xml.h> 00004 * \brief Class wrappers around the libxml library 00005 * 00006 * \if NODOC 00007 * $Log: xml.h_v $ 00008 * Revision 1.23 2003/09/15 13:49:56 fileserver!dwilliss 00009 * Doxygen 00010 * 00011 * Revision 1.22 2003/09/03 16:39:53 mju 00012 * Have GetText return bool. 00013 * 00014 * Revision 1.21 2003/08/20 19:25:48 mju 00015 * Add FindNext and STRUTF8 versions of GetProperty/GetText. 00016 * Make both GetProperty methods return true if property found. 00017 * Remove pointer-returning versions of GetProperty/GetText. 00018 * 00019 * Revision 1.20 2003/06/27 21:06:26 dwilliss 00020 * Added AddEntity methods 00021 * 00022 * Revision 1.19 2003/06/27 20:46:01 dwilliss 00023 * Moved to misystem.dll and exported. Can't have inlines anymore as a 00024 * result 00025 * 00026 * Revision 1.15 2002/09/19 15:39:54 dwilliss 00027 * Changed XMLDOC's copy constructor and assignment operator to be public 00028 * instead of private. They are now implemented. 00029 * 00030 * Revision 1.13 2002/08/30 16:40:03 dwilliss 00031 * Added private, unimplemented copy constructors and assignment operators 00032 * XMLDOC could theoretically be copied, but until implemented it's better to 00033 * not allow it than to use default assignment/copy constructor. 00034 * 00035 * Revision 1.12 2002/08/30 16:27:36 dwilliss 00036 * Added Unparse methods 00037 * 00038 * Revision 1.11 2002/08/23 16:09:31 dwilliss 00039 * Added GetElementByAttribute method 00040 * 00041 * Revision 1.10 2002/08/19 15:43:37 dwilliss 00042 * Added some methods to node to get property values as bool and number with 00043 * defaults if they aren't defined 00044 * 00045 * Revision 1.9 2002/05/07 16:23:53 dwilliss 00046 * Added a NewChild method that copies a node 00047 * 00048 * Revision 1.8 2002/03/22 20:23:59 dwilliss 00049 * Added Read/Write HTML 00050 * 00051 * Revision 1.6 2002/02/15 20:45:14 dwilliss 00052 * Added WriteHTML methods 00053 * 00054 * Revision 1.5 2002/02/13 23:15:58 dwilliss 00055 * Added Attach/Detach/Delete/Replace methods 00056 * 00057 * Revision 1.4 2001/12/20 23:47:09 dwilliss 00058 * Added a Parse method 00059 * 00060 * Revision 1.3 2001/12/12 20:25:16 dwilliss 00061 * Read/Write methods take FILEPATH now 00062 * 00063 * Revision 1.1 2001/12/07 18:51:46 dwilliss 00064 * Initial revision 00065 * \endif 00066 ****************************************************************************/ 00067 00068 #ifndef INC_MI32_XML_H 00069 #define INC_MI32_XML_H 00070 00071 #ifndef __XML_TREE_H__ //!< libxml's include guards, not ours 00072 #include <libxml/tree.h> 00073 #endif 00074 #ifndef __XML_PARSER_H__ 00075 #include <libxml/parser.h> 00076 #endif 00077 00078 #ifndef INC_MI32_FILEPATH_H 00079 #include <mi32/filepath.h> 00080 #endif 00081 00082 #ifndef INC_MI32_MISTRING_H 00083 #include <mi32/mistring.h> 00084 #endif 00085 00086 #ifndef INC_MI32_UCSTRING_H 00087 #include <mi32/ucstring.h> 00088 #endif 00089 00090 #ifndef GENERATING_DOXYGEN_OUTPUT 00091 class STRUTF8; 00092 #endif //!< GENERATING_DOXYGEN_OUTPUT 00093 00094 #ifdef MISYSTEMDLL 00095 #define CLASSLIBEXPORT MI_DLLCLASSEXPORT 00096 #else 00097 #define CLASSLIBEXPORT MI_DLLCLASSIMPORT 00098 #endif 00099 00100 #ifndef XML_CASE_SENSITIVE 00101 #define XML_CASE_SENSITIVE true 00102 #endif 00103 00104 class XMLDOC; 00105 00106 class XMLNAMESPACE : public _xmlNs { 00107 public: 00108 const char* GetHREF() const { return (reinterpret_cast<const char*>(this->href)); } 00109 00110 //! Get the namespace prefix. Note that NULL is a valid prefix and 00111 //! means the default namespace within the subtree. 00112 const char* GetPrefix() const { return (reinterpret_cast<const char*>(this->prefix)); } 00113 00114 //! Return a copy of this namespace. Caller should free it later. 00115 XMLNAMESPACE* Copy() { return (static_cast<XMLNAMESPACE*>(xmlCopyNamespace(this))); } 00116 00117 //! Free this namespace. 00118 //! Should only be done if you allocated the namespace. 00119 //! Note, you can't use "delete" on a namespace pointer, as they're 00120 //! allocated by libxml using malloc() 00121 void Free() { xmlFreeNs(this); } 00122 00123 private: 00124 #ifndef GENERATING_DOXYGEN_OUTPUT 00125 //! Private and unimplemented. You can get pointers to these from 00126 //! libxml, but you cannot create or destroy them yourself 00127 XMLNAMESPACE(); 00128 XMLNAMESPACE(const XMLNAMESPACE&); 00129 ~XMLNAMESPACE(); 00130 00131 friend struct _smlNs; //!< avoids warning that destructor is private 00132 #endif // GENERATING_DOXYGEN_OUTPUT 00133 }; 00134 00135 00136 //! This class is an interface-only class to allow us to treat 00137 //! an xmlNodePtr as a (NODE*) 00138 //! Constructor/CopyConstructor/Destructor all unimplemented 00139 //! Memory allocation done through library calls, not new/delete 00140 class CLASSLIBEXPORT XMLNODE : public _xmlNode { 00141 public: 00142 00143 //! Add a comment to the end of the node 00144 void AddComment ( 00145 const UNICODE* 00146 ); 00147 00148 //! Append to the text of a node 00149 void AppendText ( 00150 const UNICODE* text 00151 ); 00152 00153 //! Attach this node to an XML document. 00154 //! This node is first detached from its current location and reattached 00155 //! as the last node of parent. NOTE: You cannot use this function to 00156 //! move a node between documents. To do that, you must first call 00157 //! Copy() to make a copy of the node owned by the destination document 00158 //! and attach the copy. 00159 void Attach ( 00160 XMLNODE* parent 00161 ); 00162 00163 //! Returns a copy of this node. 00164 //! The caller must either free it or Attach it to a document 00165 //! If the NewDoc parameter is specified (and not NULL) the copy 00166 //! will be owned by that document, but is not actually in its 00167 //! tree anywhere. 00168 XMLNODE* Copy ( 00169 bool recursive = true, 00170 XMLDOC* NewDoc = NULL 00171 ) const; 00172 00173 //! Unlink a node from its parent and delete it and all its children. 00174 void Delete (); 00175 00176 //! Remove a node from it's parent tree. If you Detach it, you must 00177 //! eventually call Delete() on it or reattach it somewhere. 00178 //! The node is still considered part of its source document and 00179 //! when that document is deleted, the node's children will be too. 00180 void Detach(); 00181 00182 //! Find a child node of a given tag string 00183 XMLNODE* FindChild ( 00184 const char* TagName //!< Must be UTF8 00185 ) const; 00186 00187 //! Find a child node of a given tag string and a given property 00188 XMLNODE* FindChild ( 00189 const char* TagName, //!< Must be UTF8 00190 const char* PropertyName, //!< Must be UTF8 00191 const UNICODE* PropertyValue 00192 ) const; 00193 00194 //! Find a child node of a given tag string and a given property 00195 XMLNODE* FindChild ( 00196 const char* TagName, //!< Must be UTF8 00197 const char* PropertyName, //!< Must be UTF8 00198 const char* PropertyValue //!< Must be UTF8 00199 ) const; 00200 00201 //! Find next child, matching current or specific tag name and optional property name/value 00202 XMLNODE* FindNext ( 00203 const char* TagName = 0, //!< Tag name (UTF8), if NULL will use current node tag name 00204 const char* PropName = 0, //!< Property name (UTF8), if NULL will match tag name only 00205 const char* PropValue = 0 //!< Property value (UTF8), if NULL will not check property value 00206 ) const; 00207 00208 //! Finds a child node which has an attribute with the given value. 00209 //! This is recursive. 00210 XMLNODE* GetElementByAttribute ( 00211 const char* AttributeName, 00212 const UNICODE* AttributeValue 00213 ) const; 00214 00215 //! Finds a child node which has an attribute with the given value. 00216 //! This is recursive. 00217 XMLNODE* GetElementByAttribute ( 00218 const char* AttributeName, 00219 const char* AttributeValue 00220 ) const; 00221 00222 //! Return the first child of this node (NULL if no children) 00223 XMLNODE* GetFirstChild ( 00224 ) const; 00225 00226 //! Return the last child of this node (NULL if no children) 00227 XMLNODE* GetLastChild() const; 00228 00229 //! Return the parent node of this node (NULL for root node) 00230 XMLNODE* GetParent() const; 00231 00232 //! Return the previous sibling node (NULL if this is the first node) 00233 XMLNODE* GetPrevious() const; 00234 00235 //! Get the name of this node. 00236 //! Note the name is actually UTF8, but 99.9% of the time is 00237 //! defined by some standards document that requires it to be 00238 //! limited to the ASCII subset 00239 const char* GetName() const; 00240 00241 //! Return the next sibling node (NULL if this is the last node) 00242 XMLNODE* GetNext() const; 00243 00244 //! Return the value of a property of this node as an MISTRING. 00245 //! Returned string is left unchanged if the property is not available 00246 //! @return true if property exists, false if not. 00247 bool GetProperty( 00248 const char* PropName, //!< Property name (UTF8) 00249 MISTRING& str, 00250 bool bCaseSensitive = true 00251 ) const; 00252 00253 //! Return the value of a property of this node as STRUTF8. 00254 //! Returned string is left unchanged if the property is not available 00255 //! @return true if property exists, false if not. 00256 bool GetProperty( 00257 const char* PropName, //!< Property name (UTF8) 00258 STRUTF8& retstr, //!< Property value returned 00259 bool bCaseSensitive = true 00260 ) const; 00261 00262 //! Return the value of a boolean property of this node. 00263 //! Returns true if the named property is "true" or "yes", false 00264 //! if it's "false" or "no", and the default if unspecified or 00265 //! none of the above 00266 bool GetPropertyBool ( 00267 const char* PropName, //!< Property name (UTF8) 00268 bool dft = false, 00269 bool bCaseSensitive = true 00270 ) const; 00271 00272 //! Return the value of a numeric property of this node. 00273 double GetPropertyNum ( 00274 const char* PropName, //!< Property name (UTF8) 00275 double dft = 0.0, 00276 bool bCaseSensitive = true 00277 ) const; 00278 00279 //! Get node text as MISTRING. 00280 //! @return true if text non-empty, false if not. 00281 bool GetText ( 00282 MISTRING& str 00283 ) const; 00284 00285 //! Get node text as STRUTF8. 00286 //! @return true if text non-empty, false if not. 00287 bool GetText ( 00288 STRUTF8& str 00289 ) const; 00290 00291 //! Determine if a node has a given property 00292 //! Note the name is actually UTF8, but 99.9% of the time is 00293 //! defined by some standards document that requires it to be 00294 //! limited to the ASCII subset 00295 bool HasProperty ( 00296 const char* name, 00297 bool bCaseSensitive=true 00298 ) const; 00299 00300 //! Returns true if this node is empty 00301 bool IsEmpty() const; 00302 00303 //! Returns true if this node is text-only 00304 bool IsText() const; 00305 00306 //! Create a new, empty child node. 00307 XMLNODE* NewChild ( 00308 const char* Tag, //!< Node tag name (UTF8) 00309 XMLNAMESPACE* NameSpace = 0 00310 ); 00311 00312 //! Create a new, empty child node. 00313 //! If any characters in "value" need to be escaped into entity 00314 //! references (eg: &), this method will do it for you. 00315 XMLNODE* NewChild ( 00316 const char* Tag, //!< Node tag name (UTF8) 00317 const UNICODE* value, 00318 XMLNAMESPACE* NameSpace = 0 00319 ); 00320 00321 //! Copies a node into this document. The source node does not have 00322 //! to be from the same document. 00323 XMLNODE* NewChild ( 00324 const XMLNODE* NodeToCopy 00325 ); 00326 00327 //! Create a new CDATA node. 00328 //! A CDATA node can contain any text, except the sequence "]]>" 00329 //! (not counting the quotes) which indicates the end of a CDATA 00330 //! section in XML. 00331 //! Note that the UNICODE version of this method doesn't have a 00332 //! "length" parameter for a reason. The UNICODE string will be 00333 //! converted to UTF8, and the only length we're really interested 00334 //! in is the length of the UTF8 00335 XMLNODE* NewCDATAChild ( 00336 const char* Tag, //!< Node tag name (UTF8) 00337 const UNICODE* value, 00338 XMLNAMESPACE* NameSpace = 0 00339 ); 00340 00341 //! Create a new CDATA node. 00342 //! A CDATA node can contain any text, except the sequence "]]>" 00343 //! (not counting the quotes) which indicates the end of a CDATA 00344 //! section in XML. 00345 XMLNODE* NewCDATAChild ( 00346 const char* Tag, //!< Node tag name (UTF8) 00347 const UINT8 * value, //!< UTF8 00348 int len = -1, //!< Length of UTF8 string (-1 to use strlen) 00349 XMLNAMESPACE* NameSpace = 0 00350 ); 00351 00352 //! Create a new child node containing text 00353 //! If any characters in "value" need to be escaped into entity 00354 //! references (eg: &), this method will do it for you. 00355 //! This method exists because the xml library claims that 00356 //! xmlNewChild and xmlNewTextChild have some difference. 00357 //! I have yet to figure what that difference actually is. 00358 XMLNODE* NewTextChild ( 00359 const char* Tag, //!< Node tag name (UTF8) 00360 const UNICODE* value, 00361 XMLNAMESPACE* NameSpace = 0 00362 ); 00363 00364 //! Replace a node in an XMLDOC with this node. 00365 //! The NodeToReplace is removed from its parent and deleted. 00366 //! This node is grafted into the document in its place 00367 void Replace ( 00368 XMLNODE* NodeToReplace 00369 ); 00370 00371 //! Set the namespace for this node 00372 void SetNamespace ( 00373 XMLNAMESPACE* NameSpace 00374 ); 00375 00376 //! Set a property on a node. 00377 //! Note: The value string may contain single or double quotes, 00378 //! but it is an error for it to contain both. 00379 void SetProperty ( 00380 const char* PropName, //!< Must be UTF8 00381 const UNICODE* value 00382 ); 00383 00384 //! Set a property on a node. 00385 //! Note: The value string may contain single or double quotes, 00386 //! but it is an error for it to contain both. 00387 void SetProperty ( 00388 const char* PropName, //!< Must be UTF8 00389 const char* value //!< Must be UTF8 00390 ); 00391 00392 //! Set the text of a node 00393 void SetText ( 00394 const UNICODE* text 00395 ); 00396 00397 //! Unset a property on a node. 00398 void UnsetProperty ( 00399 const char* PropName //!< Property name (UTF8) 00400 ); 00401 00402 private: 00403 #ifndef GENERATING_DOXYGEN_OUTPUT 00404 00405 //! Constructor/CopyConstructor/Destructor all unimplemented 00406 //! Memory allocation done through library calls, not new/delete 00407 XMLNODE(); 00408 XMLNODE(const XMLNODE&); 00409 ~XMLNODE(); 00410 00411 //! Assignment operator (private and unimplemented) 00412 XMLNODE& operator= ( 00413 const XMLNODE& 00414 ); 00415 00416 friend struct _xmlNode; 00417 00418 //! NO DATA! The actual structure is allocated by libxml and we can't 00419 //! expand it. We just cast their pointer to an XMLNODE. 00420 #endif // GENERATING_DOXYGEN_OUTPUT 00421 }; 00422 00423 00424 class CLASSLIBEXPORT XMLDOC { 00425 public: 00426 00427 00428 //! Constructor. Constructs an empty document 00429 XMLDOC(); 00430 00431 //! Constructor. Constructs an XML document given one read into memory. 00432 //! Note: UNICODE string gets converted to UTF8 before parsing. If you 00433 //! are reading it in UTF8 to begin with, converting it to Unicode and 00434 //! constructing it that way is a waste of time. Use the constructor 00435 //! that takes UTF8 instead. 00436 XMLDOC ( 00437 const UNICODE* xmlbuffer 00438 ); 00439 00440 //! Constructor. Constructs an XML document given one read into memory 00441 //! (Buffer must be in UTF8 encoding) 00442 XMLDOC ( 00443 const UINT8* xmlbuffer 00444 ); 00445 00446 //! Copy constructor. 00447 //! Note, copy constructor is expensive. It has to copy the whole 00448 //! document tree in memory. 00449 XMLDOC ( 00450 const XMLDOC& 00451 ); 00452 00453 //! Destructor 00454 ~XMLDOC(); 00455 00456 //! Assignment operator 00457 //! Note, asignment is expensive. It has to copy the whole 00458 //! document tree in memory. 00459 XMLDOC& operator= ( 00460 const XMLDOC& 00461 ); 00462 00463 //! Add an entity to the XML document defintion. 00464 //! Allows you to register non-standard entities such as \° 00465 //! This method is the generic case, multiple-character entity. 00466 void AddEntity ( 00467 const char* name, //!< Without the "&" or ";" 00468 const UNICODE* content 00469 ); 00470 00471 //! Add an entity to the XML document defintion. 00472 //! Allows you to register non-standard entities such as \° 00473 //! This method is the simple case. A single character entity. 00474 void AddEntity ( 00475 const char* name, //!< Without the "&" or ";" 00476 UNICODE content 00477 ); 00478 00479 //! Reset the document to a single, empty <root> node. 00480 void Clear(); 00481 00482 //! Returns a string holding the error message from the last call 00483 //! to Parse or Read. 00484 const MISTRING& GetErrorMessage ( 00485 ) const; 00486 00487 //! Read an HTML file. 00488 //! Similar to Read() but HTML has more relaxed rules 00489 //! The file must be in UTF8 encoding 00490 ERRVALUE ReadHTML ( 00491 const FILEPATH& filepath 00492 ); 00493 00494 //! Read an HTML file 00495 //! Similar to Read() but HTML has more relaxed rules 00496 //! The file must be in UTF8 encoding 00497 ERRVALUE ReadHTML ( 00498 const UNICODE* filename 00499 ); 00500 00501 //! Read an XML file 00502 //! The file must be in UTF8 encoding 00503 ERRVALUE Read ( 00504 const FILEPATH& filepath 00505 ); 00506 00507 //! Read an XML file 00508 //! The file must be in UTF8 encoding 00509 ERRVALUE Read ( 00510 const UNICODE* filename 00511 ) { 00512 return (Read(FILEPATH(filename))); 00513 } 00514 00515 //! Parse an XML document already read into memory 00516 //! (Buffer must be in UTF8 encoding) 00517 ERRVALUE Parse ( 00518 const UINT8* xmlbuffer 00519 ); 00520 00521 //! Convert the in-memory structure to an MISTRING. 00522 void Unparse ( 00523 MISTRING& string, 00524 bool bIndentTreeOutput = true 00525 ); 00526 00527 void SetCompression( 00528 int compression 00529 ); 00530 00531 //! Convert the in-memory structure to an MISTRING using HTML formatting rules. 00532 void UnparseHTML ( 00533 MISTRING& string, 00534 bool bIndentTreeOutput = true 00535 ); 00536 00537 //! Write out to an XML file 00538 //! The resulting file will be in UTF8 encoding 00539 ERRVALUE Write ( 00540 const FILEPATH& filepath 00541 ) const; 00542 00543 //! Write out to an XML file 00544 //! The resulting file will be in UTF8 encoding 00545 ERRVALUE Write ( 00546 const UNICODE* filename 00547 ) const; 00548 00549 //! Write out to an XML file as an HTML file 00550 //! Same as Write, but HTML has slightly different formatting rules 00551 ERRVALUE WriteHTML ( 00552 const FILEPATH& filepath 00553 ) const; 00554 00555 //! Write out to an XML file 00556 //! Same as Write, but HTML has slightly different formatting rules 00557 ERRVALUE WriteHTML ( 00558 const UNICODE* filename 00559 ) const; 00560 00561 xmlDocPtr* GetDocPtr() const; 00562 00563 //! Get the root node of the document. 00564 XMLNODE* GetRootNode ( 00565 ) const; 00566 00567 protected: 00568 //! Will be called by Read and Parse methods to inform derived class 00569 //! that the whole document has been changed out from under it. 00570 virtual void OnDocReplaced(); 00571 00572 private: 00573 #ifndef GENERATING_DOXYGEN_OUTPUT 00574 00575 class ERRORTRAP { 00576 public: 00577 ERRORTRAP(XMLDOC*); 00578 ~ERRORTRAP(); 00579 private: 00580 MISTRING& m_string; 00581 00582 static void MyErrorFunc(void*, const char*, ...); 00583 }; 00584 00585 xmlDocPtr m_doc; 00586 MISTRING m_ErrorMessage; 00587 00588 friend class XMLNODE; 00589 friend class ERRORTRAP; 00590 #endif // GENERATING_DOXYGEN_OUTPUT 00591 00592 }; 00593 00594 #undef CLASSLIBEXPORT 00595 00596 #endif 00597
1.3.4-20031026