mi32/xml.h

Go to the documentation of this file.
00001 /*****************************************************************************
00002  *
00003  * \file xml.h <mi32/xml.h>
00004  * \brief Class wrappers around the libxml library
00005  *
00006  * \if NODOC
00007  * $Log: xml.h_v $
00008  * Revision 1.23  2003/09/15 13:49:56  fileserver!dwilliss
00009  * Doxygen
00010  *
00011  * Revision 1.22  2003/09/03 16:39:53  mju
00012  * Have GetText return bool.
00013  *
00014  * Revision 1.21  2003/08/20 19:25:48  mju
00015  * Add FindNext and STRUTF8 versions of GetProperty/GetText.
00016  * Make both GetProperty methods return true if property found.
00017  * Remove pointer-returning versions of GetProperty/GetText.
00018  *
00019  * Revision 1.20  2003/06/27 21:06:26  dwilliss
00020  * Added AddEntity methods
00021  *
00022  * Revision 1.19  2003/06/27 20:46:01  dwilliss
00023  * Moved to misystem.dll and exported.  Can't have inlines anymore as a
00024  * result
00025  *
00026  * Revision 1.15  2002/09/19 15:39:54  dwilliss
00027  * Changed XMLDOC's copy constructor and assignment operator to be public
00028  * instead of private.  They are now implemented.
00029  *
00030  * Revision 1.13  2002/08/30 16:40:03  dwilliss
00031  * Added private, unimplemented copy constructors and assignment operators
00032  * XMLDOC could theoretically be copied, but until implemented it's better to
00033  * not allow it than to use default assignment/copy constructor.
00034  *
00035  * Revision 1.12  2002/08/30 16:27:36  dwilliss
00036  * Added Unparse methods
00037  *
00038  * Revision 1.11  2002/08/23 16:09:31  dwilliss
00039  * Added GetElementByAttribute method
00040  *
00041  * Revision 1.10  2002/08/19 15:43:37  dwilliss
00042  * Added some methods to node to get property values as bool and number with
00043  * defaults if they aren't defined
00044  *
00045  * Revision 1.9  2002/05/07 16:23:53  dwilliss
00046  * Added a NewChild method that copies a node
00047  *
00048  * Revision 1.8  2002/03/22 20:23:59  dwilliss
00049  * Added Read/Write HTML
00050  *
00051  * Revision 1.6  2002/02/15 20:45:14  dwilliss
00052  * Added WriteHTML methods
00053  *
00054  * Revision 1.5  2002/02/13 23:15:58  dwilliss
00055  * Added Attach/Detach/Delete/Replace methods
00056  *
00057  * Revision 1.4  2001/12/20 23:47:09  dwilliss
00058  * Added a Parse method
00059  *
00060  * Revision 1.3  2001/12/12 20:25:16  dwilliss
00061  * Read/Write methods take FILEPATH now
00062  *
00063  * Revision 1.1  2001/12/07 18:51:46  dwilliss
00064  * Initial revision
00065  * \endif
00066  ****************************************************************************/
00067 
00068 #ifndef INC_MI32_XML_H
00069 #define INC_MI32_XML_H
00070 
00071 #ifndef __XML_TREE_H__     //!< libxml's include guards, not ours
00072 #include <libxml/tree.h>
00073 #endif
00074 #ifndef __XML_PARSER_H__
00075 #include <libxml/parser.h>
00076 #endif
00077 
00078 #ifndef INC_MI32_FILEPATH_H
00079 #include <mi32/filepath.h>
00080 #endif
00081 
00082 #ifndef INC_MI32_MISTRING_H
00083 #include <mi32/mistring.h>
00084 #endif
00085 
00086 #ifndef INC_MI32_UCSTRING_H
00087 #include <mi32/ucstring.h>
00088 #endif
00089 
00090 #ifndef GENERATING_DOXYGEN_OUTPUT
00091 class STRUTF8;
00092 #endif //!< GENERATING_DOXYGEN_OUTPUT
00093 
00094 #ifdef MISYSTEMDLL
00095    #define CLASSLIBEXPORT MI_DLLCLASSEXPORT
00096 #else
00097    #define CLASSLIBEXPORT MI_DLLCLASSIMPORT
00098 #endif
00099 
00100 #ifndef XML_CASE_SENSITIVE
00101 #define XML_CASE_SENSITIVE true
00102 #endif
00103 
00104 class XMLDOC;
00105 
00106 class XMLNAMESPACE : public _xmlNs {
00107    public:
00108       const char* GetHREF() const { return (reinterpret_cast<const char*>(this->href)); }
00109 
00110       //! Get the namespace prefix.  Note that NULL is a valid prefix and
00111       //! means the default namespace within the subtree.
00112       const char* GetPrefix() const { return (reinterpret_cast<const char*>(this->prefix)); }
00113       
00114       //! Return a copy of this namespace.  Caller should free it later.
00115       XMLNAMESPACE* Copy() { return (static_cast<XMLNAMESPACE*>(xmlCopyNamespace(this))); }
00116 
00117       //! Free this namespace.
00118       //! Should only be done if you allocated the namespace.
00119       //! Note, you can't use "delete" on a namespace pointer, as they're
00120       //! allocated by libxml using malloc()
00121       void Free() { xmlFreeNs(this); }
00122 
00123    private:
00124       #ifndef GENERATING_DOXYGEN_OUTPUT
00125       //! Private and unimplemented.  You can get pointers to these from
00126       //! libxml, but you cannot create or destroy them yourself
00127       XMLNAMESPACE();
00128       XMLNAMESPACE(const XMLNAMESPACE&);
00129       ~XMLNAMESPACE();
00130 
00131       friend struct _smlNs;   //!< avoids warning that destructor is private
00132       #endif // GENERATING_DOXYGEN_OUTPUT
00133    };
00134 
00135 
00136 //! This class is an interface-only class to allow us to treat
00137 //! an xmlNodePtr as a (NODE*)
00138 //! Constructor/CopyConstructor/Destructor all unimplemented
00139 //! Memory allocation done through library calls, not new/delete
00140 class CLASSLIBEXPORT XMLNODE : public _xmlNode {
00141    public:
00142 
00143       //! Add a comment to the end of the node
00144       void AddComment (
00145          const UNICODE*
00146          );
00147       
00148       //! Append to the text of a node
00149       void AppendText (
00150          const UNICODE* text
00151          );
00152 
00153       //! Attach this node to an XML document. 
00154       //! This node is first detached from its current location and reattached
00155       //! as the last node of parent. NOTE:  You cannot use this function to
00156       //! move a node between documents.  To do that, you must first call
00157       //! Copy() to make a copy of the node owned by the destination document
00158       //! and attach the copy.
00159       void Attach (
00160          XMLNODE* parent
00161          );
00162 
00163       //! Returns a copy of this node. 
00164       //! The caller must either free it or Attach it to a document
00165       //! If the NewDoc parameter is specified (and not NULL) the copy
00166       //! will be owned by that document, but is not actually in its
00167       //! tree anywhere.
00168       XMLNODE* Copy (
00169          bool recursive = true,
00170          XMLDOC* NewDoc = NULL
00171          ) const;
00172 
00173       //! Unlink a node from its parent and delete it and all its children.
00174       void Delete ();
00175 
00176       //! Remove a node from it's parent tree.  If you Detach it, you must
00177       //! eventually call Delete() on it or reattach it somewhere.
00178       //! The node is still considered part of its source document and
00179       //! when that document is deleted, the node's children will be too.
00180       void Detach();
00181       
00182       //! Find a child node of a given tag string
00183       XMLNODE* FindChild (
00184          const char* TagName           //!< Must be UTF8
00185          ) const;
00186 
00187       //! Find a child node of a given tag string and a given property
00188       XMLNODE* FindChild (
00189          const char* TagName,          //!< Must be UTF8
00190          const char* PropertyName,     //!< Must be UTF8
00191          const UNICODE* PropertyValue
00192          ) const;
00193 
00194       //! Find a child node of a given tag string and a given property
00195       XMLNODE* FindChild (
00196          const char* TagName,          //!< Must be UTF8
00197          const char* PropertyName,     //!< Must be UTF8
00198          const char* PropertyValue     //!< Must be UTF8
00199          ) const;
00200 
00201       //! Find next child, matching current or specific tag name and optional property name/value
00202       XMLNODE* FindNext (
00203          const char* TagName = 0,      //!< Tag name (UTF8), if NULL will use current node tag name
00204          const char* PropName = 0,     //!< Property name (UTF8), if NULL will match tag name only
00205          const char* PropValue = 0     //!< Property value (UTF8), if NULL will not check property value
00206          ) const;
00207 
00208       //! Finds a child node which has an attribute with the given value.
00209       //! This is recursive.
00210       XMLNODE* GetElementByAttribute (
00211          const char* AttributeName,
00212          const UNICODE* AttributeValue
00213          ) const;
00214 
00215       //! Finds a child node which has an attribute with the given value.
00216       //! This is recursive.
00217       XMLNODE* GetElementByAttribute (
00218          const char* AttributeName,
00219          const char* AttributeValue
00220          ) const;
00221 
00222       //! Return the first child of this node (NULL if no children)
00223       XMLNODE* GetFirstChild (
00224          ) const;
00225 
00226       //! Return the last child of this node (NULL if no children)
00227       XMLNODE* GetLastChild()  const;
00228 
00229       //! Return the parent node of this node (NULL for root node)
00230       XMLNODE* GetParent()  const;
00231 
00232       //! Return the previous sibling node (NULL if this is the first node)
00233       XMLNODE* GetPrevious() const;
00234       
00235       //! Get the name of this node.
00236       //! Note the name is actually UTF8, but 99.9% of the time is
00237       //! defined by some standards document that requires it to be
00238       //! limited to the ASCII subset
00239       const char* GetName() const;
00240 
00241       //! Return the next sibling node (NULL if this is the last node)
00242       XMLNODE* GetNext() const;
00243 
00244       //! Return the value of a property of this node as an MISTRING.
00245       //! Returned string is left unchanged if the property is not available
00246       //! @return true if property exists, false if not.
00247       bool GetProperty(
00248          const char* PropName,         //!< Property name (UTF8)
00249          MISTRING& str,
00250          bool bCaseSensitive = true
00251          ) const;
00252 
00253       //! Return the value of a property of this node as STRUTF8.
00254       //! Returned string is left unchanged if the property is not available
00255       //! @return true if property exists, false if not.
00256       bool GetProperty(
00257          const char* PropName,         //!< Property name (UTF8)
00258          STRUTF8& retstr,              //!< Property value returned
00259          bool bCaseSensitive = true
00260          ) const;
00261 
00262       //! Return the value of a boolean property of this node.
00263       //! Returns true if the named property is "true" or "yes", false
00264       //! if it's "false" or "no", and the default if unspecified or
00265       //! none of the above
00266       bool GetPropertyBool (
00267          const char* PropName,         //!< Property name (UTF8)
00268          bool dft = false,
00269          bool bCaseSensitive = true
00270          ) const;
00271 
00272       //! Return the value of a numeric property of this node.
00273       double GetPropertyNum (
00274          const char* PropName,         //!< Property name (UTF8)
00275          double dft = 0.0,
00276          bool bCaseSensitive = true
00277          ) const;
00278 
00279       //! Get node text as MISTRING.
00280       //! @return true if text non-empty, false if not.
00281       bool GetText (
00282          MISTRING& str
00283          ) const;
00284 
00285       //! Get node text as STRUTF8.
00286       //! @return true if text non-empty, false if not.
00287       bool GetText (
00288          STRUTF8& str
00289          ) const;
00290 
00291       //! Determine if a node has a given property
00292       //! Note the name is actually UTF8, but 99.9% of the time is
00293       //! defined by some standards document that requires it to be
00294       //! limited to the ASCII subset
00295       bool HasProperty (
00296          const char* name, 
00297          bool bCaseSensitive=true
00298          ) const;
00299 
00300       //! Returns true if this node is empty
00301       bool IsEmpty() const;
00302 
00303       //! Returns true if this node is text-only
00304       bool IsText() const;
00305 
00306       //! Create a new, empty child node.
00307       XMLNODE* NewChild (
00308          const char* Tag,              //!< Node tag name (UTF8)
00309          XMLNAMESPACE* NameSpace = 0
00310          );
00311          
00312       //! Create a new, empty child node.
00313       //! If any characters in "value" need to be escaped into entity
00314       //! references (eg: &amp;), this method will do it for you.
00315       XMLNODE* NewChild (
00316          const char* Tag,              //!< Node tag name (UTF8)
00317          const UNICODE* value,
00318          XMLNAMESPACE* NameSpace = 0
00319          );
00320          
00321       //! Copies a node into this document. The source node does not have
00322       //! to be from the same document.
00323       XMLNODE* NewChild (
00324          const XMLNODE* NodeToCopy
00325          );
00326 
00327       //! Create a new CDATA node.
00328       //! A CDATA node can contain any text, except the sequence "]]>"
00329       //! (not counting the quotes) which indicates the end of a CDATA
00330       //! section in XML.
00331       //! Note that the UNICODE version of this method doesn't have a
00332       //! "length" parameter for a reason.  The UNICODE string will be
00333       //! converted to UTF8, and the only length we're really interested
00334       //! in is the length of the UTF8
00335       XMLNODE* NewCDATAChild (
00336          const char* Tag,              //!< Node tag name (UTF8)
00337          const UNICODE* value,
00338          XMLNAMESPACE* NameSpace = 0
00339          );
00340 
00341       //! Create a new CDATA node.
00342       //! A CDATA node can contain any text, except the sequence "]]>"
00343       //! (not counting the quotes) which indicates the end of a CDATA
00344       //! section in XML.
00345       XMLNODE* NewCDATAChild (
00346          const char* Tag,              //!< Node tag name (UTF8)
00347          const UINT8 * value,          //!< UTF8
00348          int len = -1,                 //!< Length of UTF8 string (-1 to use strlen)
00349          XMLNAMESPACE* NameSpace = 0
00350          );
00351 
00352       //! Create a new child node containing text
00353       //! If any characters in "value" need to be escaped into entity
00354       //! references (eg: &amp;), this method will do it for you.
00355       //! This method exists because the xml library claims that
00356       //! xmlNewChild and xmlNewTextChild have some difference.
00357       //! I have yet to figure what that difference actually is.
00358       XMLNODE* NewTextChild (
00359          const char* Tag,              //!< Node tag name (UTF8)
00360          const UNICODE* value,
00361          XMLNAMESPACE* NameSpace = 0
00362          );
00363 
00364       //! Replace a node in an XMLDOC with this node.
00365       //! The NodeToReplace is removed from its parent and deleted.
00366       //! This node is grafted into the document in its place
00367       void Replace (
00368          XMLNODE* NodeToReplace
00369          );
00370 
00371       //! Set the namespace for this node
00372       void SetNamespace ( 
00373          XMLNAMESPACE* NameSpace
00374          );
00375 
00376       //! Set a property on a node.
00377       //! Note:  The value string may contain single or double quotes,
00378       //! but it is an error for it to contain both.
00379       void SetProperty (
00380          const char* PropName,         //!< Must be UTF8
00381          const UNICODE* value
00382          );
00383 
00384       //! Set a property on a node.
00385       //! Note:  The value string may contain single or double quotes,
00386       //! but it is an error for it to contain both.
00387       void SetProperty (
00388          const char* PropName,         //!< Must be UTF8
00389          const char* value             //!< Must be UTF8
00390          );
00391 
00392       //! Set the text of a node
00393       void SetText (
00394          const UNICODE* text
00395          );
00396 
00397       //! Unset a property on a node.
00398       void UnsetProperty (
00399          const char* PropName          //!< Property name (UTF8)
00400          );
00401 
00402    private:
00403       #ifndef GENERATING_DOXYGEN_OUTPUT
00404 
00405       //! Constructor/CopyConstructor/Destructor all unimplemented
00406       //! Memory allocation done through library calls, not new/delete
00407       XMLNODE();
00408       XMLNODE(const XMLNODE&);
00409       ~XMLNODE();
00410 
00411       //! Assignment operator (private and unimplemented)
00412       XMLNODE& operator= (
00413          const XMLNODE&
00414          );
00415 
00416    friend struct _xmlNode;
00417 
00418       //! NO DATA!  The actual structure is allocated by libxml and we can't
00419       //! expand it.  We just cast their pointer to an XMLNODE.
00420       #endif // GENERATING_DOXYGEN_OUTPUT
00421    };
00422 
00423 
00424 class CLASSLIBEXPORT XMLDOC {
00425    public:
00426 
00427       
00428       //! Constructor.  Constructs an empty document
00429       XMLDOC();
00430 
00431       //! Constructor.  Constructs an XML document given one read into memory.
00432       //! Note:  UNICODE string gets converted to UTF8 before parsing.  If you
00433       //! are reading it in UTF8 to begin with, converting it to Unicode and
00434       //! constructing it that way is a waste of time.  Use the constructor
00435       //! that takes UTF8 instead.
00436       XMLDOC (
00437          const UNICODE* xmlbuffer
00438          );
00439 
00440       //! Constructor.  Constructs an XML document given one read into memory
00441       //! (Buffer must be in UTF8 encoding)
00442       XMLDOC (
00443          const UINT8* xmlbuffer
00444          );
00445 
00446       //! Copy constructor. 
00447       //! Note, copy constructor is expensive.  It has to copy the whole
00448       //! document tree in memory.
00449       XMLDOC (
00450          const XMLDOC&
00451          );
00452 
00453       //! Destructor
00454       ~XMLDOC();
00455       
00456       //! Assignment operator 
00457       //! Note, asignment is expensive.  It has to copy the whole
00458       //! document tree in memory.
00459       XMLDOC& operator= (
00460          const XMLDOC&
00461          );
00462 
00463       //! Add an entity to the XML document defintion.
00464       //! Allows you to register non-standard entities such as \&deg;
00465       //! This method is the generic case, multiple-character entity.
00466       void AddEntity (
00467          const char* name, //!< Without the "&" or ";"
00468          const UNICODE* content
00469          );
00470 
00471       //! Add an entity to the XML document defintion.
00472       //! Allows you to register non-standard entities such as \&deg;
00473       //! This method is the simple case. A single character entity.
00474       void AddEntity (
00475          const char* name, //!< Without the "&" or ";"
00476          UNICODE content
00477          );
00478 
00479       //! Reset the document to a single, empty <root> node.
00480       void Clear();
00481 
00482       //! Returns a string holding the error message from the last call
00483       //! to Parse or Read.
00484       const MISTRING& GetErrorMessage (
00485          ) const;
00486 
00487       //! Read an HTML file.
00488       //! Similar to Read() but HTML has more relaxed rules
00489       //! The file must be in UTF8 encoding
00490       ERRVALUE ReadHTML (
00491          const FILEPATH& filepath
00492          );
00493 
00494       //! Read an HTML file
00495       //! Similar to Read() but HTML has more relaxed rules
00496       //! The file must be in UTF8 encoding
00497       ERRVALUE ReadHTML (
00498          const UNICODE* filename
00499          );
00500       
00501       //! Read an XML file
00502       //! The file must be in UTF8 encoding
00503       ERRVALUE Read (
00504          const FILEPATH& filepath
00505          );
00506 
00507       //! Read an XML file
00508       //! The file must be in UTF8 encoding
00509       ERRVALUE Read (
00510          const UNICODE* filename
00511          ) {
00512          return (Read(FILEPATH(filename)));
00513          }
00514       
00515       //! Parse an XML document already read into memory 
00516       //! (Buffer must be in UTF8 encoding)
00517       ERRVALUE Parse (
00518          const UINT8* xmlbuffer
00519          );
00520 
00521       //! Convert the in-memory structure to an MISTRING.
00522       void Unparse (
00523          MISTRING& string,
00524          bool bIndentTreeOutput = true
00525          );
00526 
00527       void SetCompression(
00528          int compression
00529          );
00530 
00531       //! Convert the in-memory structure to an MISTRING using HTML formatting rules.
00532       void UnparseHTML (
00533          MISTRING& string,
00534          bool bIndentTreeOutput = true
00535          );
00536 
00537       //! Write out to an XML file
00538       //! The resulting file will be in UTF8 encoding
00539       ERRVALUE Write (
00540          const FILEPATH& filepath
00541          ) const;
00542 
00543       //! Write out to an XML file
00544       //! The resulting file will be in UTF8 encoding
00545       ERRVALUE Write (
00546          const UNICODE* filename
00547          ) const;
00548 
00549       //! Write out to an XML file as an HTML file
00550       //! Same as Write, but HTML has slightly different formatting rules
00551       ERRVALUE WriteHTML (
00552          const FILEPATH& filepath
00553          ) const;
00554 
00555       //! Write out to an XML file
00556       //! Same as Write, but HTML has slightly different formatting rules
00557       ERRVALUE WriteHTML (
00558          const UNICODE* filename
00559          ) const;
00560 
00561       xmlDocPtr* GetDocPtr() const;
00562 
00563       //! Get the root node of the document.  
00564       XMLNODE* GetRootNode (
00565          ) const;
00566 
00567       protected:
00568          //! Will be called by Read and Parse methods to inform derived class
00569          //! that the whole document has been changed out from under it.
00570          virtual void OnDocReplaced();
00571 
00572       private:
00573          #ifndef GENERATING_DOXYGEN_OUTPUT
00574          
00575          class ERRORTRAP {
00576             public:
00577                ERRORTRAP(XMLDOC*);
00578                ~ERRORTRAP();
00579             private:
00580                MISTRING& m_string;
00581 
00582                static void MyErrorFunc(void*, const char*, ...);
00583             };
00584 
00585          xmlDocPtr m_doc;
00586          MISTRING m_ErrorMessage;
00587 
00588          friend class XMLNODE;
00589          friend class ERRORTRAP;
00590       #endif // GENERATING_DOXYGEN_OUTPUT
00591 
00592    };
00593 
00594 #undef CLASSLIBEXPORT
00595 
00596 #endif
00597  

Generated on Thu Aug 12 06:19:13 2004 for TNTsdk by doxygen 1.3.4-20031026