Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members  

muo_convert.cc

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2001 Karsten Reincke <karsten.reincke@gnukose.org>
00003  *  
00004  * This program is free software; you can redistribute it and/or modify
00005  * it under the terms of the GNU General Public License as published by
00006  * the Free Software Foundation; either version 2 of the License, or
00007  * (at your option) any later version.
00008  * 
00009  * This program is distributed in the hope that it will be useful,
00010  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00011  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00012  * GNU General Public License for more details.
00013  * 
00014  * You should have received a copy of the GNU General Public License
00015  * along with this program; if not, write to the Free Software 
00016  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00017  *  
00018  * file <muo_convert.cc> version <#1.3.1#> of project <MrProjext>
00019  */   
00020 
00027 #include <iostream>
00028 #include <string>
00029 #include <cstdlib>
00030 #include <cstdio>
00031 #include "muo_convert.h"
00032 
00033 /* &&& (1) local preprocessor-defines &&&&&&&&&&&&&&&&&&&&&&& */
00034 
00035 /* &&& (2) &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& */
00036 /* &&& (2) definitions of static global variables &&&&&&&&&&& */
00037 
00038 /* &&& (3) &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& */
00039 /* &&&     methods for class &&& */
00040 
00041 /* --- (3.A) - public ----------------------------------------- */
00042 /* :-: (3.A.1) public constructors, inits & operators :-:-:-:-: */
00048 krmuo::Converter::Converter()
00049 : mUnConvertedToken(""),
00050   mConvertedToken(""),
00051   mConvertedString(""),
00052   mUtf8String(""),
00053   mTokenMap(NULL)
00054 {
00055   mUtf8Sign[0]=0;
00056 }
00057 
00061 krmuo::Converter::~Converter()
00062 {
00063 }
00064 
00065 /* :-: (3.A.2) public getter -:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-: */
00066 /* :-: (3.A.3) public setter -:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-: */
00067 /* :-: (3.A.4) public others -:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-: */
00068 
00080 const string krmuo::Converter::decode(const string& pUnDecodedString)
00081 {
00082   assert(mTokenMap);
00083   mUnConvertedStringEnd=pUnDecodedString.end();
00084   mUnConvertedStringIterator=pUnDecodedString.begin();
00085   mConvertedString="";
00086   unsigned char sign;
00087   while(getNextUnDecodedToken(mUnConvertedToken))
00088   {
00089     if (isDecodableEmbeddedUtf8Token(mUnConvertedToken,sign))
00090       mConvertedString+=sign;
00091     else if (mTokenMap->isDecodableToken(mUnConvertedToken))
00092       mConvertedString+=mTokenMap->decodeToken(mUnConvertedToken);
00093     else
00094       mConvertedString+=mUnConvertedToken;
00095   }
00096   return mConvertedString;
00097 }
00098 
00109 const string krmuo::Converter::encode(const string& pUnEncodedString)
00110 {
00111   assert(mTokenMap);
00112   mUnConvertedStringIterator=pUnEncodedString.begin();
00113   mConvertedString="";
00114   while(mUnConvertedStringIterator!=pUnEncodedString.end())
00115   {
00116     if (mTokenMap->isEncodableSign((unsigned char)*mUnConvertedStringIterator))
00117       mConvertedString+=mTokenMap->encodeSign((unsigned char)*mUnConvertedStringIterator);
00118     else if (isUtf8EncodableSign((unsigned char)*mUnConvertedStringIterator,mUtf8String))
00119       mConvertedString+=mUtf8String;
00120     else
00121       mConvertedString+=(unsigned char)*mUnConvertedStringIterator;
00122     mUnConvertedStringIterator++;
00123   }
00124   return mConvertedString;
00125 }
00126 
00133 bool krmuo::Converter::isDecodableEmbeddedUtf8Token
00134 ( const string& pUtf8Token,
00135   unsigned char& pSign
00136 )
00137 {
00138   if (pUtf8Token.size()<3)
00139     return false;
00140     
00141   string::const_iterator tokenSign=pUtf8Token.begin(); 
00142   if (((unsigned char)* tokenSign)  != ((unsigned char)'&'))
00143     return false;
00144 
00145   tokenSign++;
00146   if (((unsigned char) *tokenSign) != ((unsigned char)'#'))
00147     return false;
00148                
00149   tokenSign++;
00150   // *tokensign == 'x' | 'X' | 'o' | 'O' | '1' ... '0'
00151     
00152   enum NumberStil { UNKNOWN, HEX, OCTAL, DECIMAL } numberStil=UNKNOWN;
00153     
00154   if (  ((unsigned char) toupper(*tokenSign)) == (unsigned char)'X' )
00155   {
00156     numberStil=HEX;
00157     tokenSign++;
00158   }
00159   else if (((unsigned char) toupper(*tokenSign)) == (unsigned char)'O' )
00160   {
00161     numberStil=OCTAL;
00162     tokenSign++;      
00163   }
00164   else if ( isdigit(*tokenSign) )
00165   {
00166     numberStil=DECIMAL;
00167   }
00168     
00169   if (numberStil==UNKNOWN)
00170     return false;
00171 
00172   string utf8DecodedToken="";
00173   while(    (tokenSign !=pUtf8Token.end())
00174         &&  (   ((unsigned char)*tokenSign) 
00175             !=  ((unsigned char)';')
00176             ) 
00177         ) 
00178   {    
00179     utf8DecodedToken+=*tokenSign;
00180     tokenSign++;
00181   }
00182   
00183   unsigned int signValue=0;
00184   switch(numberStil)
00185   {
00186     case HEX:
00187       sscanf(utf8DecodedToken.c_str(),"%x",&signValue);
00188         break;
00189     case OCTAL:
00190       sscanf(utf8DecodedToken.c_str(),"%o",&signValue);
00191         break;
00192     default:
00193       sscanf(utf8DecodedToken.c_str(),"%u",&signValue);
00194   }
00195   pSign=(unsigned char) signValue;
00196   return true;  
00197 }    
00198 
00206 bool krmuo::Converter::getNextUnDecodedToken(string& pUnDecodedToken)
00207 {
00208   // are there still candidates for decoding?
00209   if (mUnConvertedStringIterator==mUnConvertedStringEnd)
00210     return false;
00211     
00212   pUnDecodedToken="";
00213   
00214   // take the start position of the next candidate
00215   string::const_iterator
00216     unDecodedParseIterator=mUnConvertedStringIterator;
00217   
00218   // is the candidate more than a simple token
00219   if (  ( (unsigned char)*unDecodedParseIterator )
00220      == ( (unsigned char) '&')
00221      )
00222   {
00223     bool doParse=true;  
00224     
00225     while(doParse)
00226     {
00227       // did we run into uncorrect html
00228       if (  (unDecodedParseIterator==mUnConvertedStringEnd)
00229          ||
00230             (   (unsigned char)*unDecodedParseIterator 
00231             <=  (unsigned char)' '
00232           ) )
00233       {
00234         
00235         // ... then eval the first sign (=&) as normal sign
00236         unDecodedParseIterator=mUnConvertedStringIterator;
00237         pUnDecodedToken="";
00238         
00239         // each last sign of an html-tag will 
00240         // be overtaken at the end of this method 
00241         // and in this case the last sign  is the first sign
00242         doParse=false;
00243       }
00244       else
00245       {
00246         // have we reached the correct end of an html-tag?
00247         if  (   (unsigned char)*unDecodedParseIterator 
00248             ==  (unsigned char)';'
00249             )
00250         {
00251           // each last sign of an html-sign-tag will 
00252           // be overtaken at the end of this method 
00253           doParse=false;
00254         }
00255         else
00256         {
00257           // overtake the signtagsign into the token-string
00258           // and test the next signtagsign
00259           pUnDecodedToken+=*unDecodedParseIterator;
00260           unDecodedParseIterator++;
00261         }        
00262       }
00263     }    
00264     mUnConvertedStringIterator=unDecodedParseIterator;
00265   }
00266   
00267   // take over the last still not overtaken sign of the token:
00268   // if the token is only a simple the sign this (first) sign
00269   // is also the last sign
00270   pUnDecodedToken+=*mUnConvertedStringIterator;
00271   mUnConvertedStringIterator++;
00272   return true;
00273 }
00274 
00281 bool krmuo::Converter::isUtf8EncodableSign(const unsigned char pSign, string& pUtf8Token)
00282 {
00283   if (pSign < 0x80)
00284     return false;
00285   sprintf(mUtf8Sign,"&#%02d;", pSign);
00286   pUtf8Token=mUtf8Sign;
00287   return true;
00288 }
00289 
00290 /* &&& (4) &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& */
00291 /* &&&     methods for class HtmlConverter &&&&&&&&&&&&&&&&&&&& */
00292 /* --- (4.A) - public ----------------------------------------- */
00293 /* :-: (4.A.1) public constructors, inits & operators :-:-:-:-: */
00300 krmuo::HtmlConverter::HtmlConverter()
00301 {
00302   mTokenMap=&mHtmlCodeTokenMap; 
00303 }
00304 
00308 krmuo::HtmlConverter::~HtmlConverter()
00309 {
00310 }
00311 
00312 /* &&& (5) &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& */
00313 /* &&&     methods for class XmlConverter &&&&&&&&&&&&&&&&&&&& */
00314 /* --- (5.A) - public ----------------------------------------- */
00315 /* :-: (5.A.1) public constructors, inits & operators :-:-:-:-: */
00322 krmuo::XmlConverter::XmlConverter()
00323 {
00324   mTokenMap=&mXmlCodeTokenMap; 
00325 }
00326 
00330 krmuo::XmlConverter::~XmlConverter()
00331 {
00332 }
00333 
00334 /* &&& (6) &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& */
00335 /* &&&     methods for class UrlConverter &&&&&&&&&&&&&&&&&&&& */
00336 /* --- (6.A) - public ----------------------------------------- */
00337 /* :-: (6.A.1) public constructors, inits & operators :-:-:-:-: */
00347 krmuo::UrlConverter::UrlConverter()
00348 {
00349   mTokenMap=&mUrlCodeTokenMap; 
00350 }
00351 
00355 krmuo::UrlConverter::~UrlConverter()
00356 {
00357 }
00358 /* :-: (6.A.4) public others -:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-:-: */
00359 
00371 const string krmuo::UrlConverter::decode(const string& pUnDecodedString)
00372 {
00373 
00374   mUnConvertedStringEnd=pUnDecodedString.end();
00375   mUnConvertedStringIterator=pUnDecodedString.begin();
00376   mConvertedString="";
00377   unsigned char sign;
00378   while(getNextUnDecodedToken(mUnConvertedToken))
00379   {
00380     
00381     if (isDecodableEmbeddedUtf8Token(mUnConvertedToken,sign))
00382       mConvertedString+=sign;
00383     else if (mUrlCodeTokenMap.isDecodableToken(mUnConvertedToken))
00384       mConvertedString+=mUrlCodeTokenMap.decodeToken(mUnConvertedToken);
00385     else
00386       mConvertedString+=mUnConvertedToken;
00387   }  
00388   return mConvertedString;
00389 }
00390 
00401 const string krmuo::UrlConverter::encode(const string& pUnEncodedString)
00402 {
00403   mUnConvertedStringIterator=pUnEncodedString.begin();
00404   mConvertedString="";
00405   while(mUnConvertedStringIterator!=pUnEncodedString.end())
00406   {
00407     if (mUrlCodeTokenMap.isEncodableSign((unsigned char)*mUnConvertedStringIterator))
00408       mConvertedString+=mUrlCodeTokenMap.encodeSign((unsigned char)*mUnConvertedStringIterator);
00409     else if (isUtf8EncodableSign((unsigned char)*mUnConvertedStringIterator,mUtf8String))
00410       mConvertedString+=mUtf8String;
00411     else
00412       mConvertedString+=(unsigned char)*mUnConvertedStringIterator;
00413     mUnConvertedStringIterator++;
00414   }
00415   return mConvertedString;
00416 }
00417 
00429 bool krmuo::UrlConverter::getNextUnDecodedToken(string& pUnDecodedToken)
00430 {
00431   // are there still candidates for decoding?
00432   if (mUnConvertedStringIterator==mUnConvertedStringEnd)
00433     return false;
00434     
00435   pUnDecodedToken="";
00436   
00437   // take the start position of the next candidate
00438   string::const_iterator
00439     unDecodedParseIterator=mUnConvertedStringIterator;
00440   
00441   // is the candidate more than a simple token
00442   if (  ( (unsigned char)*unDecodedParseIterator )
00443      == ( (unsigned char) '%')
00444      )
00445   {
00446     unDecodedParseIterator++;
00447     if (unDecodedParseIterator==mUnConvertedStringEnd)
00448       unDecodedParseIterator=mUnConvertedStringIterator;
00449     else
00450     {
00451       pUnDecodedToken="%";
00452       pUnDecodedToken+=*unDecodedParseIterator;
00453       unDecodedParseIterator++;
00454       if (unDecodedParseIterator==mUnConvertedStringEnd)
00455         unDecodedParseIterator=mUnConvertedStringIterator;
00456     }
00457   }
00458   mUnConvertedStringIterator=unDecodedParseIterator;
00459   
00460   // take over the last still not overtaken sign of the token:
00461   // if the token is only a simple the sign this (first) sign
00462   // is also the last sign
00463   pUnDecodedToken+=*mUnConvertedStringIterator;
00464   mUnConvertedStringIterator++;
00465   return true;
00466 }
00467 
00474 bool krmuo::UrlConverter::isUtf8EncodableSign(const unsigned char pSign, string& pUtf8Token)
00475 {
00476   if (pSign < 0x80)
00477     return false;
00478   sprintf(mUtf8Sign,"%c%02x",'%',(unsigned int)pSign);     
00479   pUtf8Token=mUtf8Sign;
00480   return true;
00481 }
00482 
00489 bool krmuo::UrlConverter::isDecodableEmbeddedUtf8Token
00490 ( const string& pUtf8Token,
00491   unsigned char& pSign
00492 )
00493 {
00494   if (pUtf8Token.size()<3)
00495     return false;
00496     
00497   string::const_iterator tokenSign=pUtf8Token.begin(); 
00498   if (((unsigned char)* tokenSign)  != ((unsigned char)'%'))
00499     return false;
00500 
00501   tokenSign++;
00502 
00503   string utf8DecodedToken="";
00504   while(tokenSign !=pUtf8Token.end())
00505   {    
00506     utf8DecodedToken+=*tokenSign;
00507     tokenSign++;
00508   }
00509   
00510   unsigned int signValue=0;
00511   sscanf(utf8DecodedToken.c_str(),"%x",&signValue);
00512   pSign=(unsigned char) signValue;
00513   return true;  
00514 }    

Generated on Sun Mar 16 10:58:36 2003 for MRPROJEXT by doxygen1.2.17