Jeremy Huntwork wrote:
> On Sun, Dec 10, 2006 at 12:06:07PM +0100, M.Canales.es
wrote:
>> As a POC looks very good, the parsing speed is
awesome
> Thanks.
As has been speed in an over half a year POC code elsewhere
in this list by
yours truly.
>> Of course there is yet a lot of issues the need be
fixed like the extra
>> new line in replaceble tags, handling
"nodump" commands, separate the
>> scripts by build phases, and the like, but is a
nice starting point.
> You're right, there is a lot to do with it. I know that
George M. was
> still doing a lot of work on his full-featured parser
and build-tool.
As I have emailed you and Manuel Es. some pieces of code of
a rather _aged_
set gathering ideas from testcases, I post the same concept
headers in the
list. We have had a discussion in freenode yesterday about
this (in the last
24h in anycase).
> So this little bit of code may eventually be eclipsed,
but it's fun to play
> with. I'll try to clean the code up a bit more and work
on some other
> features as I have time. And of course, as always,
anyone's welcome to
> improve on it.
I am glad that Jeremy was able to incorporate these ideas so
easily into his
alfs-POC code. As with past posts in this list of course,
and past
synchronized C++ svn posting. Since my own vision is
concentrated on more
under the hood matters right now, but I also think that you
could make work
on this and improve it, I sent you all the same material
Jeremy and Manuel
have received.
NOTE: This is _old_ _old_ code, POC code, but _working_
header code. Things
are there for the educational fun of it. The only addition
in that is that I
entered the GPL clause on top, as I have said so in previous
posts. I hope
you enjoy the rest and make whatever use you can of those
ideas. As with the
ones present in past posts here. You can find both as
attachment and direct
post here. It may be necessary for some to get rid of
wordwrap in order to
read this, and in anycase I advise you to read the xmlyze.h
header/implementation file from the attachment. For those
who actually read
the code, you will find some minor documentation in it. You
have what you
need for the xml parsing there.
Do what you please with that under GPL v.2.0 or up.
> --
> JH
George M.
//---- CODE POST FOLLOWS
#ifndef XMLYZE_H_
#define XMLYZE_H_
#endif /*XMLYZE_H_*/
#include <iostream>
#include <fstream>
#include <set>
#include <algorithm>
#include <vector>
#include <string>
#include <utility>
#include <map>
#include <iterator>
#include <deque>
#include <sstream>
#define XNZYME_VERSION 0.0.1;
/* EDIT: Copyright 2006 (c) George Makrydakis, under GPl 2.0
with NO
EXCEPTIONS WHATSOEVER
Copyright (c) George Makrydakis 2006
Licensed under the terms of the GNU GPL v 2.0 or up with no
exceptions.
For more information, please visit http://www.gnu.
org/copyleft/gpl.html.
*/
/*
Project Purpose:
xnzyme was originally a loosely defined project for
creating an automated
build system for delivering custom made GNU/Linux
distributions to the end user, much like another automated
linux from scratch
(alfs - http://www.linuxfroms
cratch.org) tool.
*/
typedef enum xErrorType{
ENTITY_RECURSION,
ENTITY_NOT_FOUND,
ENTITY_NOT_RESOLVED,
FILE_NOT_FOUND,
FILE_NOT_READWRITE,
TAG_IREG_CLOSURE,
TAG_IREG_OPENING,
TAG_IREG_ATTRIBUTE,
TAG_IREG_DTD,
TAG_SEEK_NOT_FOUND,
TAG_PREMATURE_STOP,
XPATH_IREG_EXPRESSION,
OK
}xErrorType;
typedef enum xNodeType{
XML_NULL_ENTRY,
XML_FOUL_ENTRY,
XML_EMPTY,
XML_EMPTY_ATTRIBUTES,
XML_OPENING,
XML_OPENING_ATTRIBUTES,
XML_CLOSING,
XML_COMMENT,
XML_CDATA,
XML_PLAIN_TEXT,
XML_INSTRUCTION,
XPATH_INSTRUCTION
}xNodeType;
typedef enum xParsingMode{
// types of parsing
PSTAT_STANDARD,
PSTAT_MINIMAL,
PSTAT_COMMENT,
PSTAT_EXCLUDE_ATTRIBUTE,
PSTAT_EXCLUDE_DOCTYPE,
PSTAT_EXCLUDE_ENTITY,
PSTAT_EXCLUDE_CDATA,
PSTAT_EXCLUDE_PCDATA,
PSTAT_EXCLUDE_COMMENT,
PSTAT_EXCLUSIVE_DTD
}xParsingMode;
typedef enum xEntityParsingStep{
START,
STOP,
NAME,
RESOURCE,
SYSTEMPUBLIC,
NDATA
}xEntityParsingStep;
class xEntity{
private:
std::map<
std::string,std::pair<std::string,bool> >
genEntStore;
std::map<
std::string,std::pair<std::string,bool> >
parEntStore;
std::string xmlEntitySolver(const std::string&
myentity, bool& sflag){
std::map<
std::string,std::pair<std::string,bool> >&
entStore =
genEntStore;
std::map<
std::string,std::pair<std::string,bool>
>::iterator
entStoreIter;
std::string myentitystring;
std::string mystring, entity;
std::set<std::string> entBuffBlockGen;
std::set<std::string> entBuffBlockPar;
std::set<std::string>& buffblock =
entBuffBlockGen;
std::set<std::string>::iterator buffpos;
if(!sflag){
entStore = parEntStore;
buffblock = entBuffBlockPar;
}
entStoreIter = entStore.find(myentity);
if(entStoreIter != entStore.end()){
myentitystring = entStoreIter->second.first;
if(entStoreIter->second.second)
return(myentitystring);
buffblock.insert(myentity);
std::string::size_type postIndex =
myentitystring.find_first_of("&%");
std::string::size_type entyIndex =
myentitystring.find_first_of(";
tvrn ",postIndex);
while(postIndex != std::string::npos &&
entyIndex != std::string::npos){
if(myentitystring[entyIndex] != ';'){
sflag = false;
return("XML_PARSING_ERROR: missing ; from entity
syntax!");
}
if(myentitystring[postIndex] == '%'){
sflag = false;
entStore = parEntStore;
buffblock = entBuffBlockPar;
}
else{
sflag = true;
entStore = genEntStore;
buffblock = entBuffBlockGen;
}
entity = myentitystring.substr(postIndex + 1,
entyIndex - postIndex -
1);
buffpos = buffblock.find(entity);
if(buffpos != buffblock.end()){
if(!sflag) entity = "%" + entity;
else entity = "&" + entity;
sflag = false;
return("RECURSIVE CALL MADE FOR:" + entity
+ ";");
}
buffblock.insert(entity);
entStoreIter = entStore.find(entity);
if(entStoreIter != entStore.end()){
myentitystring.erase(postIndex, entyIndex - postIndex
+ 1);
myentitystring.insert(postIndex,
entStoreIter->second.first);
if(entStoreIter->second.first == "&"
|| entStoreIter->second.first
== "%") postIndex++;
postIndex =
myentitystring.find_first_of("&%", postIndex);
entyIndex =
myentitystring.find_first_of(";tvrn
",postIndex);
buffblock.erase(--buffblock.end());
}
else{
sflag = false;
return("UNRESOLVED ENTITY!"); // FIXME:
make more eloquent...
}
}
if(!sflag){
entStore = parEntStore;
buffblock = entBuffBlockPar;
}
entStoreIter->second.first = myentitystring;
entStoreIter->second.second = true;
}
else{
sflag = false;
return("ENTITY WAS NOT FOUND!");
}
return(myentitystring);
};
public:
bool addGenEntity(const std::string& entityName,
const std::string&
entityResource){
return(genEntStore.insert(make_pair(entityName,
make_pair(entityResource,
false))).second);
};
bool delGenEntity(const std::string& entityName){
std::map<
std::string,std::pair<std::string,bool>
>::iterator
myDelPosIndex = genEntStore.find(entityName);
if(myDelPosIndex != genEntStore.end()){
genEntStore.erase(myDelPosIndex);
return(true);
}
return (false);
};
bool getGenEntity(const std::string& entityName,
std::string& resultString)
{
bool flag = true;
resultString = xmlEntitySolver(entityName, flag);
return(flag);
};
bool addParEntity(const std::string& entityName,
const std::string&
entityResource){
return(parEntStore.insert(make_pair(entityName,
make_pair(entityResource,
false))).second);
};
bool delParEntity(const std::string& entityName){
std::map<
std::string,std::pair<std::string,bool>
>::iterator
myDelPosIndex = parEntStore.find(entityName);
if(myDelPosIndex != parEntStore.end()){
parEntStore.erase(myDelPosIndex);
return(true);
}
return (false);
};
bool getParEntity(const std::string& entityName,
std::string& resultString)
{
bool flag = false;
resultString = xmlEntitySolver(entityName, flag);
return(flag);
};
bool empty(){
return(genEntStore.empty() &&
parEntStore.empty());
};
};
class xSNode{
// using std::multiset adds a notable overhead since we
have logarithmic
time for insertion / deletion operations.
// the only advantage would be to have a sorted structure
and have this
searching - optimized
private:
typedef struct xSNodePtr_LessThan :
std::binary_function<xSNode*, xSNode*,
bool>{
bool operator() (const xSNode* x, const xSNode* y){
return x->xNodeName < y->xNodeName;
}
}xSNodePtr_LessThan;
std::string xNodeName; // child node name
std::map<std::string,
std::string>* xNodeAttributes; // pointer to
attribute map
std::multiset<xSNode*,
xSNodePtr_LessThan>* xNodeMultiSet; // sorted
container for children, should be a pointer-to too!
std::vector<std::multiset<xSNode*,
xSNodePtr_LessThan>::iterator>*
xNodeMultiOrder; // should be a pointer too
xSNode* xNodeParent; // pointer to parent
node
public:
xSNode(){
xNodeParent = NULL;
xNodeAttributes = NULL;
xNodeMultiSet = NULL;
xNodeMultiSet = NULL;
};
~xSNode(){
delete xNodeAttributes; // it is ok to delete even a
null pointer in C++
delete xNodeMultiSet;
};
xSNode* addChildNode(const std::string& nodeData){
xSNode* nullnode = new (std::nothrow) xSNode;
std::multiset<xSNode*,
xSNodePtr_LessThan>::iterator setIterator;
if(xNodeMultiSet == NULL){
xNodeMultiSet = new(std::nothrow)
std::multiset<xSNode*,
xSNodePtr_LessThan>;
xNodeMultiOrder = new(std::nothrow)
std::vector<std::multiset<xSNode*,
xSNodePtr_LessThan>::iterator>;
// there should be a check for allocation errors here
but for the time
being, for
// every error we will return a NULL pointer!
}
if(nullnode == NULL || xNodeMultiSet == NULL ||
xNodeMultiOrder == NULL)
return (NULL);
nullnode->setName(nodeData);
nullnode->setParent(this);
setIterator =
(*xNodeMultiSet).insert((*xNodeMultiSet).begin(),
nullnode); // insert this into the multiset this is logN
thing, this may help
in partially sorted entries
(*xNodeMultiOrder).push_back(setIterator);
return(*setIterator);
};
bool delChildNode(const unsigned& nodeRelPosition){
if(nodeRelPosition < (*xNodeMultiOrder).size()){
(*xNodeMultiSet).erase((*xNodeMultiOrder)[nodeRelPositi
on]); // 0 is
FIRST (1)
delete *(*xNodeMultiOrder)[nodeRelPosition]; // erase
it from the
multiset too
return(true);
}
return(false);
};
void setName(const std::string& nodeName){
xNodeName = nodeName;
};
xSNode* getChildNode(const unsigned&
nodeRelPosition){
if(nodeRelPosition < (*xNodeMultiOrder).size()){
return(*(*xNodeMultiOrder)[nodeRelPosition]);
}
return(NULL);
};
void setParent(xSNode* myParent){
xNodeParent = myParent;
};
xSNode* getParent(){
return(xNodeParent);
};
std::string getName(){
return(xNodeName);
};
bool addAttribute(const std::string& attrName,
const std::string&
attrValue){
if(xNodeAttributes == NULL){
xNodeAttributes = new (std::nothrow)
std::map<std::string, std::string>;
return(this->addAttribute(attrName, attrValue));
}
return(xNodeAttributes->insert(make_pair(attrName,
attrValue)).second);
};
bool getAttribute(const std::string& attrName,
std::string& attrValue){
std::map<std::string, std::string>::const_iterator
mapIter;
if(xNodeAttributes != NULL){
mapIter = xNodeAttributes->find(attrName);
if(mapIter != xNodeAttributes->end()){
attrValue = mapIter->second;
return(true);
}
}
return(false);
};
bool operator>(const xSNode& b){
return(this->xNodeName > b.xNodeName);
};
bool operator<(const xSNode& b){
return(this->xNodeName < b.xNodeName);
};
bool operator==(const xSNode& b){
return(this->xNodeName == b.xNodeName);
};
xSNode* seekSibling(const std::string& sibling){
xSNode mySibling;
mySibling.setName(sibling);
std::multiset<xSNode*,
xSNodePtr_LessThan>::iterator siblingIterator;
siblingIterator = (*xNodeMultiSet).find(&mySibling);
return(siblingIterator == (*xNodeMultiSet).end() ? NULL
:
*siblingIterator);
};
};
class xSNodeBlock{
// the way we have it, xNode can now be included in
xSNodeBlock (good)
private:
xSNode rootnode;
xSNode* currnode;
xSNode* getNode(){
return(currnode);
};
public:
xSNodeBlock(){
currnode = &rootnode;
};
void setRoot(const std::string& rootname);
bool startNode(const std::string& cnode){
xSNode* node = currnode->addChildNode(cnode);
if(node != NULL){
node->setParent(currnode);
currnode = node; // shift to the new node!
return(true);
}
return(false);
};
bool closeNode(){
if(currnode->getParent() != NULL){
currnode = currnode->getParent();
return(true);
}
return(false);
};
bool deleteNode(){
xSNode* nodePtr = currnode->getParent();
if(nodePtr != NULL){
delete currnode;
currnode = nodePtr;
return(true);
}
return(false);
};
bool getNodeAttribute(const std::string attrName,
std::string& attrValue){
return(this->getNode()->getAttribute(attrName,
attrValue));
};
bool addNodeAttribute(const std::string attrName, const
std::string&
attrValue){
return(this->getNode()->addAttribute(attrName,
attrValue));
};
std::string getNodeName(){
return(currnode->getName());
};
};
class xPNode{
// // this is considered the plain version of xNode,
without use of sorted by
with a hack!
private:
typedef struct xPNodePtr_LessThan_Order :
std::binary_function<xPNode*,
xPNode*, bool>{
bool operator() (const xPNode* x, const xPNode* y){
return x->xNodeOrder < y->xNodeOrder;
}
}xPNodePtr_LessThan_Order;
typedef struct xPNodePtr_LessThan_Name :
std::binary_function<xPNode*,
xPNode*, bool>{
bool operator() (const xPNode* x, const xPNode* y){
return x->xNodeName < y->xNodeName;
}
}xPNodePtr_LessThan_Name;
std::string xNodeName;// child node name
std::map<std::string, std::string>*
xNodeAttributes;// pointer to
attribute map
std::vector<xPNode*>* xNodeVector;// unsorted
container for children
xPNode* xNodeParent;// pointer to parent node
unsigned xNodeOrder; // order of
insertion
public:
xPNode(){
xNodeParent = NULL;
xNodeAttributes = NULL;
xNodeVector = NULL;
xNodeVector = NULL;
xNodeOrder = 0;
};
~xPNode(){
delete xNodeAttributes; // it is ok to delete even a
null pointer in C++
delete xNodeVector;
};
xPNode* addChildNode(const std::string& nodeData,
const unsigned&
orderNum){
if(xNodeVector == NULL) xNodeVector = new(std::nothrow)
std::vector<xPNode*>;
xPNode* nullnode = new (std::nothrow) xPNode;
if(nullnode == 0 || xNodeVector == 0) return (NULL);
nullnode->setName(nodeData);
nullnode->setParent(this);
nullnode->xNodeOrder = orderNum;
(*xNodeVector).push_back(nullnode);
return(*--(*xNodeVector).end());
};
bool delChildNode(const unsigned& nodeRelPosition){
if(nodeRelPosition < xNodeVector->size()){
delete (*xNodeVector)[nodeRelPosition];
xNodeVector->erase(xNodeVector->begin() +
nodeRelPosition); // 0 is FIRST
(1)
return(true);
}
return(false);
};
void setName(const std::string& nodeName){
xNodeName = nodeName;
};
xPNode* getChildNode(const unsigned&
nodeRelPosition){
if(nodeRelPosition < xNodeVector->size()){
return((*xNodeVector)[nodeRelPosition]);
}
return(NULL);
};
void setParent(xPNode* myParent){
xNodeParent = myParent;
};
xPNode* getParent(){
return(xNodeParent);
};
std::string getName(){
return(xNodeName);
};
bool addAttribute(const std::string& attrName,
const std::string&
attrValue){
if(xNodeAttributes == NULL){
xNodeAttributes = new (std::nothrow)
std::map<std::string, std::string>;
return(this->addAttribute(attrName, attrValue));
}
return(xNodeAttributes->insert(make_pair(attrName,
attrValue)).second);
};
bool getAttribute(const std::string& attrName,
std::string& attrValue){
std::map<std::string, std::string>::const_iterator
mapIter;
if(xNodeAttributes != NULL){
mapIter = xNodeAttributes->find(attrName);
if(mapIter != xNodeAttributes->end()){
attrValue = mapIter->second;
return(true);
}
}
return(false);
};
bool operator>(const xPNode& b){
return(this->xNodeName > b.xNodeName);
};
bool operator<(const xPNode& b){
return(this->xNodeName < b.xNodeName);
};
xPNode* seekSibling(const std::string& sibling){
// the process here is to be considered relatively
simple
/*
* depending on the number of siblings, linear or binary
search techniques
* are to be considered. The bigger the number of
siblings is, the greater
* efficiency you have with binary search. The overhead
in this case is
* introduces when you try to to binary searching: you
have to sort first.
*
* sorting can be done either using the same Vector
container, or
* by using another one which is actually a sorted copy
*
* The seekSibling function performs better after the
second call made
because it;
* 1. copies the entire Vector into a new Vector(first
call)
* 2. sorts that Vector (first call)
* 3. binary searches (any call)
*
* Therefore, it is not advised to use it unless there
are to be made
multiple searches
*
*/
unsigned firstTerm = 0;
unsigned lastTerm = (*xNodeVector).size();
unsigned midTerm;
if(xNodeVector != NULL){
// allocated, lets copy!
xNodeVector = this->xNodeVector;
sort((*xNodeVector).begin(),
(*xNodeVector).end(),
xPNodePtr_LessThan_Name());
// ok we sorted it
// now the fun starts, the container is sorted by NAME
in ascending
order, and we
// need to find out WHERE is the VALUE we seek...
LEts; write an
iterative binary search
// function
while(firstTerm <= lastTerm){
midTerm = (firstTerm + lastTerm) / 2;
if(sibling >
(*xNodeVector)[midTerm]->xNodeName)
firstTerm = midTerm + 1;
else if (sibling <
(*xNodeVector)[midTerm]->xNodeName)
lastTerm = midTerm - 1;
else
return((*xNodeVector)[midTerm]);
}
}
return(NULL);
};
unsigned getAll(){
return((*xNodeVector).size());
};
void sortit(){
sort((*xNodeVector).begin(),
(*xNodeVector).end(),
xPNodePtr_LessThan_Name());
};
};
class xPNodeBlock{
// this is going to be our structure of choice, because of
its remarkable
performance
private:
xPNode rootnode;
xPNode* currnode;
xPNode* getNode(){
return(currnode);
};
public:
xPNodeBlock(){
currnode = &rootnode;
};
bool startNode(const std::string& cnode){
xPNode* node = currnode->addChildNode(cnode, 0);
if(node != NULL){
node->setParent(currnode);
currnode = node; // shift to the new node!
return(true);
}
return(false);
};
bool closeNode(){
if(currnode->getParent() != NULL){
currnode = currnode->getParent();
return(true);
}
return(false);
};
bool deleteNode(){
xPNode* nodePtr = currnode->getParent();
if(nodePtr != NULL){
delete currnode;
currnode = nodePtr;
return(true);
}
return(false);
};
bool getNodeAttribute(const std::string attrName,
std::string& attrValue){
return(this->getNode()->getAttribute(attrName,
attrValue));
};
bool addNodeAttribute(const std::string attrName, const
std::string&
attrValue){
return(this->getNode()->addAttribute(attrName,
attrValue));
};
std::string getNodeName(){
return(currnode->getName());
};
void doit(){
currnode->sortit();
};
};
class xMStringFile{
private:
std::ifstream fileHandle;
std::string* charBuffer;
bool errors;
public:
xMStringFile(){
charBuffer = new (std::nothrow) std::string;
if(charBuffer == NULL) errors = true;
else errors = false;
};
bool fcheck(){
return(errors);
};
bool open(const char* fileid){
if(!fileHandle.is_open()){
fileHandle.open(fileid);
if(!fileHandle){
errors = true;
return(false);
}
return(true);
}
return(false);
};
~xMStringFile(){
delete charBuffer; // it is safe to delete a null
pointer in C++
};
bool fload(){
if(!errors){
std::ostringstream buffStream;
buffStream << fileHandle.rdbuf();
buffStream.str().swap((*charBuffer));
return(true);
}
return(false);
};
bool fdump(const char* fileName){
std::ofstream writeFile(fileName);
if(writeFile){
writeFile << (*charBuffer);
return(true);
}
errors = true;
return(false);
};
void fclose(){
fileHandle.close();
};
bool fis_open(){
return(fileHandle.is_open());
};
// std::string - like functions for the functions used in
our
setting(wrappers for now)
// ...find_first_of(...);
// ...find(...);
// ...find_first_not_of(...);
// ...find_last_not_of(...);
// ...find_last_of(...);
// what you see is mostly wrapper functions, this will
change.
/*
* temporarily provide wrappers around std::string that
are of use during
parsing
*
*/
char operator [](unsigned int number){
return (*charBuffer)[number];
};
std::string::size_type find( const std::string& str,
std::string::size_type
index ){
return(charBuffer->find(str, index));
};
std::string::size_type find( const char* str,
std::string::size_type
index ){
return(charBuffer->find(str, index));
};
std::string::size_type find( const char* str,
std::string::size_type index,
std::string::size_type length ){
return(charBuffer->find(str, index, length));
};
std::string::size_type find( char ch,
std::string::size_type index ){
return(charBuffer->find(ch, index));
};
std::string::size_type find_first_of( const std::string
&str,
std::string::size_type index = 0 ){
return(charBuffer->find_first_of(str, index));
};
std::string::size_type find_first_of( const char* str,
std::string::size_type index = 0 ){
return(charBuffer->find_first_of(str, index));
};
std::string::size_type find_first_of( const char* str,
std::string::size_type index, std::string::size_type num ){
return(charBuffer->find_first_of(str, index, num));
};
std::string::size_type find_first_of( char ch,
std::string::size_type index
= 0 ){
return(charBuffer->find_first_of(ch, index));
};
std::string::size_type find_first_not_of( const
std::string& str,
std::string::size_type index = 0 ){
return(charBuffer->find_first_not_of(str, index));
};
std::string::size_type find_first_not_of( const char*
str,
std::string::size_type index = 0 ){
return(charBuffer->find_first_not_of(str, index));
};
std::string::size_type find_first_not_of( const char*
str,
std::string::size_type index, std::string::size_type num ){
return(charBuffer->find_first_not_of(str, index,
num));
};
std::string::size_type find_first_not_of( char ch,
std::string::size_type
index = 0 ){
return(charBuffer->find_first_not_of(ch, index));
};
std::string::size_type find_last_not_of( const
std::string& str,
std::string::size_type index = std::string::npos ){
return(charBuffer->find_last_not_of(str, index));
};
std::string::size_type find_last_not_of( const char* str,
std::string::size_type index = std::string::npos){
return(charBuffer->find_last_not_of(str, index));
};
std::string::size_type find_last_not_of( const char* str,
std::string::size_type index, std::string::size_type num ){
return(charBuffer->find_last_not_of(str, index,
num));
};
std::string::size_type find_last_not_of( char ch,
std::string::size_type
index = std::string::npos ){
return(charBuffer->find_last_not_of(ch, index));
};
std::string::size_type find_last_of( const
std::string& str,
std::string::size_type index = std::string::npos ){
return(charBuffer->find_last_of(str, index));
};
std::string::size_type find_last_of( const char* str,
std::string::size_type index = std::string::npos ){
return(charBuffer->find_last_of(str, index));
};
std::string::size_type find_last_of( const char* str,
std::string::size_type index, std::string::size_type num ){
return(charBuffer->find_last_of(str, index, num));
};
std::string::size_type find_last_of( char ch,
std::string::size_type index
= std::string::npos ){
return(charBuffer->find_last_of(ch, index));
};
std::string substr( std::string::size_type index,
std::string::size_type
num = std::string::npos ){
return(charBuffer->substr(index, num));
};
};
class xmlObj{
private:
std::string::size_type lastIndex;
std::string::size_type postIndex;
xMStringFile mystring;
public:
xmlObj(){
lastIndex = postIndex = 0;
};
void load(const char* filexml){
mystring.open(filexml);
mystring.fload();
};
bool xmlMainParser();
bool parse(){
// parse for every < character met.
lastIndex = mystring.find_first_not_of("
tvrn");
while(lastIndex != std::string::npos){
if(mystring[lastIndex] != '<'){
unsigned oldone = lastIndex;
lastIndex = mystring.find("<",
lastIndex);
std::cout << "DATA STARTS!n "
<< mystring.substr(oldone, lastIndex -
oldone) << "nDATA ENDS! " <<
std::endl;
} else if(this->xmlMainParser()) {
lastIndex = mystring.find_first_not_of("
tvrn", lastIndex + 1);
} else {
std::cout << "unexpected behaviour"
<< std::endl;
}
}
};
};
bool xmlObj: mlMai
nParser(){
// SLIM IT DOWN!
postIndex = mystring.find_first_of("
ntvr><&%!;'"=", lastIndex + 1);
std::string attribute_name;
std::string attribute_value;
std::string myname;
std::string tipstring = " ntvr"; // works!
bool doneflag = false;
if(postIndex != std::string::npos){
// it is supposed that: mystring[lastIndex] == '<'
since we checked it
before calling this routine!
switch (mystring[postIndex]){
case '&':case '?': case '%':
std::cout << "feature not supported yet
:" << mystring[postIndex] <<
std::endl;
return(false);
case '!':
if(mystring.find("--", postIndex) ==
postIndex + 1){
// this is a comment, get rid of it
lastIndex = mystring.find("-->",
postIndex + 3);
std::cout << "xnzyme:[report]> comment
statement" << std::endl;
return(true);
}
std::cout << "feature not supported yet:
!" << std::endl;
return(false);
case 't': case 'n': case 'v': case 'r': case ' ':
myname = mystring.substr(lastIndex + 1, postIndex -
lastIndex - 1);
std::cout << "OPENING ELEMENT:" +
myname << std::endl;
lastIndex = mystring.find_first_not_of(tipstring,
postIndex);
postIndex = mystring.find_first_of("
ntvr=><"'%&", lastIndex);
if(postIndex != std::string::npos){ // this one checks
the <element >
validity...
if(mystring[lastIndex] == '>' ||
mystring[lastIndex] == '<'){ // what if
it is undefined...
std::cout <<" illegal syntax after tag
definition" << std::endl;
return(false);
} else if(mystring[lastIndex] == '/' &&
mystring[lastIndex + 1] == '>'){
// probably an empty ellement
std::cout << "Empty element again"
<< std::endl;
return(true);
}
}
while(postIndex != std::string::npos){ // attribute
loop
unsigned checkout = postIndex - lastIndex;
if(!checkout){ // sweetness
std::cout << "error! no name
specified!" << std::endl;
return(false);
}
attribute_name = mystring.substr(lastIndex, checkout);
// ok name is
valid
lastIndex = mystring.find_first_not_of(tipstring,
postIndex);
postIndex = mystring.find("=", lastIndex);
// if this does not exist
then we are busted
if(postIndex != std::string::npos && lastIndex
== postIndex){
postIndex++;
lastIndex = mystring.find_first_not_of(tipstring,
postIndex);
postIndex = mystring.find(""",
lastIndex);
if(postIndex != std::string::npos){
if(lastIndex != postIndex){
std::cout <<" error! no quote
specified!" << std::endl; // forgot the
quote, yet busted!
return(false);
}
postIndex = mystring.find(""",
lastIndex + 1);
if(postIndex != std::string::npos){
postIndex++;
attribute_value = mystring.substr(lastIndex,
postIndex - lastIndex);
lastIndex = mystring.find_first_not_of(tipstring,
postIndex);
if(lastIndex != std::string::npos){
if(mystring[lastIndex] == '>'){
std::cout << "tATTRN: " +
attribute_name << std::endl;
std::cout << "tATTRV: " +
attribute_value << std::endl;
std::cout << "ELEMENT TYPE IS:
PLAIN"<< std::endl;
return(true);
} else if(mystring.find("/>",
lastIndex) == lastIndex){
std::cout << "tATTRN: " +
attribute_name << std::endl;
std::cout << "tATTRV: " +
attribute_value << std::endl;;
std::cout << "ELEMENT TYPE IS:
EMPTY"<< std::endl;
lastIndex++;
return(true);
}// fix here too
}
else std::cout << "fscking error!"
<< std::endl;
}
} else return(false); // quote did not exist!
}
else return(false); // the = sign did not exist
std::cout << "tATTRN: " +
attribute_name << std::endl;
std::cout << "tATTRV: " +
attribute_value << std::endl;
postIndex = mystring.find_first_of("
ntvr="'%&;()",
lastIndex); //check for attrname to be correct, see above
how.
}
case '>':
myname = mystring.substr(lastIndex + 1, postIndex -
lastIndex - 1);
if(myname.empty()){
std::cout << "error in syntax"
<< std::endl;
return(false);
}
if(myname[0] =='/'){
if(myname.size() == 1){
std::cout << "mistake! syntax error!
orphan close !" << std::endl;
return(false);
}
std::cout << "CLOSING ELEMENT: " +
myname.substr(1) << std::endl; //
</element>
return(true);
}
if(mystring[postIndex - 1] == '/'){
std::cout << "ELEMENT TYPE IS: EMPTY:
"+ myname.erase(postIndex -
lastIndex - 2) << std::endl;
} else if (mystring[postIndex] == '>') {
std::cout << "ELEMENT TYPE IS: OPENING,
PLAIN" + myname << std::endl; //
<element>
}
};
doneflag = true;
lastIndex = mystring.find_first_not_of("
ntvr", postIndex);
postIndex = mystring.find_first_of("
ntvr="'%&;()", lastIndex);
} else {
std::cout << "null magic" <<
std::endl; // postIndex is out of range...
}
return(doneflag);
};
// -- CODE ENDS
--
http://linuxfromscratch.org/mailman/listinfo/alfs-discu
ss
FAQ: http://www.linux
fromscratch.org/faq/
Unsubscribe: See the above information page
|