Main Page   Class Hierarchy   Alphabetical List   Compound List   Examples  

qp.h

00001 /***************************************************************************
00002     copyright            : (C) 2002-2005 by Stefano Barbato
00003     email                : [email protected]
00004 
00005     $Id: qp_8h-source.html,v 1.4 2006-03-12 12:28:31 tat Exp $
00006  ***************************************************************************/
00007 
00008 /***************************************************************************
00009  *                                                                         *
00010  *   This program is free software; you can redistribute it and/or modify  *
00011  *   it under the terms of the GNU General Public License as published by  *
00012  *   the Free Software Foundation; either version 2 of the License, or     *
00013  *   (at your option) any later version.                                   *
00014  *                                                                         *
00015  ***************************************************************************/
00016 #ifndef _MIMETIC_CODEC_QP_H_
00017 #define _MIMETIC_CODEC_QP_H_
00018 #include <iostream>
00019 #include <string>
00020 #include <sstream>
00021 #include <cassert>
00022 #include <mimetic/libconfig.h>
00023 #include <mimetic/utils.h>
00024 #include <mimetic/circular_buffer.h>
00025 #include <mimetic/codec/codec_base.h>
00026 #include <mimetic/codec/codec_chain.h>
00027 
00028 namespace mimetic
00029 {
00030 
00031 class QP
00032 {
00033     friend class test_qp;
00034     enum { LF = 0xA, CR = 0xD, NL = LF, TAB = 9, SP = 32 };
00035     enum { default_maxlen = 76 };
00036     enum { 
00037         printable,  /* print as-is */
00038         tab,        /* print if !isBinary */
00039         sp,         /* ' ' */
00040         newline,    /* cr or lf; encode if isBinary*/    
00041         binary,     /* rest of the ascii map */
00042         unsafe      /* "!\"#$@[]\\^`{}|~" */
00043     };
00044     static char sTb[256];
00045 
00046 public:
00047 
00048 /// quoted-printable encoder
00049 /*!
00050 
00051  \sa encode decode
00052  */
00053 class Encoder: public buffered_codec, public chainable_codec<Encoder>
00054 {
00055     enum { laBufSz = 5 }; // look-ahead buffer
00056     size_t m_pos, m_maxlen;
00057     bool m_binary;
00058     circular_buffer<char_type> m_cbuf;
00059 
00060     template<typename OutIt>
00061     void hardLineBrk(OutIt& out)
00062     {
00063         *out = NL; ++out;
00064         m_pos = 1;
00065     }
00066     template<typename OutIt>
00067     void softLineBrk(OutIt& out)
00068     {
00069         *out = '='; ++out;
00070         hardLineBrk(out);
00071     }
00072     template<typename OutIt>
00073     void write(char_type ch, OutIt& out)
00074     {
00075         bool is_last_ch = m_cbuf.empty();
00076         if(!is_last_ch && m_pos == m_maxlen)
00077             softLineBrk(out);
00078         *out = ch; ++out;
00079         m_pos++;
00080     }
00081     template<typename OutIt>
00082     void writeHex(char_type ch, OutIt& out)
00083     {
00084         static char_type hexc[] =
00085         { 
00086             '0', '1', '2', '3', '4', '5' ,'6', '7', '8', '9',
00087             'A', 'B', 'C', 'D', 'E', 'F'
00088         };        
00089         bool is_last_ch = m_cbuf.empty();
00090         if(m_pos + (is_last_ch ? 1 : 2) >= m_maxlen)
00091             softLineBrk(out);
00092         // write out =HH
00093         *out = '='; ++out;
00094         *out = hexc[ch >> 4]; ++out;
00095         *out = hexc[ch & 0xf]; ++out;
00096         m_pos += 3;
00097     } 
00098     template<typename OutIt>
00099     void encodeChar(char_type c, OutIt& out)
00100     {
00101         int cnt = m_cbuf.count();
00102         switch(sTb[c])
00103         {
00104         case printable:
00105             if(m_pos == 1)
00106             {
00107                 switch(c)
00108                 {
00109                 case 'F': // hex enc on "^From .*"
00110                     if(cnt>=4 && m_cbuf.compare(0,4,"rom "))
00111                     {
00112                         writeHex(c,out);
00113                         return;
00114                     }
00115                     break;
00116                 case '.': // hex encode if "^.[\r\n]" or on eof
00117                     if(!cnt || sTb[ m_cbuf[0] ] == newline)
00118                     {
00119                         writeHex(c,out);
00120                         return;
00121                     }
00122                     break;
00123                 }
00124             } 
00125             write(c,out);
00126             break;
00127         case tab:
00128         case sp:
00129             // on binary encoding, or last input ch or newline
00130             if(m_binary || !cnt || sTb[ m_cbuf[0] ] == newline)
00131                 writeHex(c,out);
00132             else
00133                 write(c,out);
00134             break;
00135         case newline:
00136             if(m_binary)
00137                 writeHex(c, out);
00138             else {
00139                 if(cnt && m_cbuf[0] == (c == CR ? LF : CR))
00140                     m_cbuf.pop_front(); // eat it 
00141                 hardLineBrk(out);
00142             }
00143             break;
00144         case binary:
00145             if(!m_binary) m_binary = 1; // switch to binary mode
00146             writeHex(c, out);
00147             break;
00148         case unsafe:
00149             writeHex(c, out);
00150             break;
00151         }
00152     }
00153 public:
00154     /*! return the multiplier of the required (max) size of the output buffer 
00155      * when encoding */
00156     double codeSizeMultiplier() const
00157     {
00158         // worse case is *3 but we'll use the (euristic) average value of 1.5.
00159         // this may decrease performance when encoding messages with many 
00160         // non-ASCII (> 127) characters 
00161         return 1.5;
00162     }
00163     /*!
00164      Constructor
00165      \param isBinary if true all space and newline characters will be
00166      treated like binary chars and will be hex encoded (useful if you
00167      want to encode a binary file).
00168      */
00169     Encoder(bool isBinary = false)
00170     : m_pos(1), m_maxlen(default_maxlen), 
00171       m_binary(isBinary), m_cbuf(laBufSz) 
00172     {
00173     }
00174     /*! Returns the name of the codec ("Quoted-Printable") */
00175     const char* name() const { return "Quoted-Printable"; }
00176     /*! Returns the max line length */
00177     size_t maxlen()
00178     {
00179         return m_maxlen;
00180     }
00181     /*! 
00182         Set the max line length. No more then \p i chars will be 
00183         printed on one line.
00184     */
00185     void maxlen(size_t i)
00186     {
00187         m_maxlen = i;
00188     }
00189     /*! 
00190      Encodes [\p bit,\p eit) and write any encoded char to \p out.
00191      */
00192     template<typename InIt, typename OutIt>
00193     void process(InIt bit, InIt eit, OutIt out)
00194     {
00195         for(; bit != eit; ++bit)
00196             process(*bit, out);
00197         flush(out);
00198     }
00199     /*! 
00200      Encodes \p ic and write any encoded output char to \p out.
00201      \warning You must call flush() when all chars have been 
00202      processed by the encode funcion.
00203      \n
00204      \code
00205         while( (c = getchar()) != EOF )
00206             qp.process(c, out);    
00207         qp.flush();
00208      \endcode
00209      \n
00210      \sa flush()
00211      */
00212     template<typename OutIt>
00213     void process(char_type ic, OutIt& out)
00214     {
00215         m_cbuf.push_back(ic);
00216         if(m_cbuf.count() < laBufSz)
00217             return;
00218         char_type c = m_cbuf.front();
00219         m_cbuf.pop_front();
00220         encodeChar(c, out);
00221     }
00222     /*!
00223     Write to \p out any buffered encoded char.
00224      */
00225     template<typename OutIt>
00226     void flush(OutIt& out)
00227     {
00228         char_type c;
00229         while(!m_cbuf.empty())
00230         {
00231             c = m_cbuf.front();
00232             m_cbuf.pop_front();
00233             encodeChar(c, out);
00234         }
00235     }
00236 };
00237 
00238 /// quoted-printable decoder
00239 /*!
00240 
00241  \sa encode decode
00242  */
00243 class Decoder: public buffered_codec, public chainable_codec<Encoder>
00244 {
00245     enum { laBufSz = 80 }; // look-ahead buffer
00246     enum {
00247         sWaitingChar,
00248         sAfterEq,
00249         sWaitingFirstHex,
00250         sWaitingSecondHex,
00251         sBlank,
00252         sNewline,
00253         sOtherChar
00254     };
00255     size_t m_pos, m_maxlen;
00256 
00257 
00258     int m_state, m_nl;
00259     std::string m_prev;
00260 
00261     template<typename OutIt>
00262     void hardLineBrk(OutIt& out) const
00263     {
00264         *out = NL; ++out;
00265     }
00266     template<typename OutIt>
00267     void write(char_type ch, OutIt& out) const
00268     {
00269         *out = ch; ++out;
00270     }
00271     bool isnl(char_type c) const
00272     {
00273         return (c == CR || c == LF);
00274     }
00275     template<typename OutIt>
00276     void flushPrev(OutIt& out)
00277     {
00278         copy(m_prev.begin(), m_prev.end(), out);
00279         m_prev.clear();
00280     }
00281     int hex_to_int(char_type c) const
00282     {
00283         if( c >= '0' && c <='9') return c - '0';
00284         else if( c >= 'A' && c <='F') return c - 'A' + 10;
00285         else if( c >= 'a' && c <='f') return c - 'a' + 10;
00286         else return 0;
00287     }
00288     bool ishex(char_type c) const
00289     {
00290         return  (c >= '0' && c <= '9') || 
00291             (c >= 'A' && c <= 'F') || 
00292             (c >= 'a' && c <= 'f');
00293     }
00294     template<typename OutIt>
00295     void decodeChar(char_type c, OutIt& out)
00296     {
00297         for(;;)
00298         {
00299             switch(m_state)
00300             {
00301             case sBlank:
00302                 if(isblank(c))
00303                     m_prev.append(1,c);
00304                 else if(isnl(c)) {
00305                     // soft linebrk & ignore trailing blanks
00306                     m_prev.clear(); 
00307                     m_state = sWaitingChar;
00308                 } else {
00309                     flushPrev(out);
00310                     m_state = sWaitingChar;
00311                     continue;
00312                 }
00313                 return;
00314             case sAfterEq:
00315                 if(isblank(c))
00316                     m_prev.append(1,c);
00317                 else if(isnl(c)) {
00318                     // soft linebrk 
00319                     m_state = sNewline;
00320                     continue;
00321                 } else {
00322                     if(m_prev.length() > 1) 
00323                     {
00324                         // there're blanks after =
00325                         flushPrev(out);
00326                         m_state = sWaitingChar;
00327                     } else
00328                         m_state = sWaitingFirstHex;
00329                     continue;
00330                 }
00331                 return;
00332             case sWaitingFirstHex:
00333                 if(!ishex(c))
00334                 {
00335                     // malformed: =[not-hexch]
00336                     flushPrev(out);
00337                     write(c, out);
00338                     m_state = sWaitingChar;
00339                     return;
00340                 } else {
00341                     m_prev.append(1,c);
00342                     m_state = sWaitingSecondHex;
00343                 }
00344                 return;
00345             case sWaitingSecondHex:
00346                 if(!ishex(c))
00347                 { // malformed (=[hexch][not-hexch])
00348                     flushPrev(out);
00349                     write(c, out);
00350                 } else {
00351                     char_type oc, last;
00352                     assert(m_prev.length());
00353                     last = m_prev[m_prev.length()-1];
00354                     oc = hex_to_int(last) << 4 | 
00355                         hex_to_int(c) ;
00356                     write(oc,out);
00357                     m_prev.clear();
00358                 }
00359                 m_state = sWaitingChar;
00360                 return;
00361             case sNewline:
00362                 if(m_nl == 0)
00363                 {
00364                     m_nl = c;
00365                     return;
00366                 } else {
00367                     int len = m_prev.length();
00368                     if(!len || m_prev[0] != '=')
00369                         hardLineBrk(out);
00370                     m_prev.clear();
00371                     m_state = sWaitingChar;
00372                     bool is2Ch;
00373                     is2Ch = (c == (m_nl == CR ? LF : CR));
00374                     m_nl = 0;
00375                     if(is2Ch)
00376                         return;
00377                     continue;
00378                 }
00379             case sWaitingChar:
00380                 if(isblank(c))
00381                 {
00382                     m_state = sBlank;
00383                     continue;
00384                 } else if(isnl(c)) {
00385                     m_state = sNewline;
00386                     continue;
00387                 } else if(c == '=') {
00388                     m_state = sAfterEq;
00389                     m_prev.append(1, c);
00390                     return;
00391                 } else {
00392                     // WARNING: NOT ignoring chars > 126
00393                     // as suggested in rfc2045 6.7 note 4
00394                     if(c < 32 && c != TAB)
00395                     {
00396                         // malformed, CTRL ch found
00397                         // ignore (rfc2045 6.7 note 4)
00398                         return;
00399                     }
00400                     write(c,out);
00401                 }
00402                 return;
00403             }
00404         }
00405     }
00406 public:
00407     /*! Constructor */
00408     Decoder()
00409     : m_state(sWaitingChar), m_nl(0)
00410     {
00411     }
00412     /*! Returns the name of the codec ("Quoted-Printable") */
00413     const char* name() const { return "Quoted-Printable"; }
00414     /*! Returns the max line length */
00415     size_t maxlen()
00416     {
00417         return m_maxlen;
00418     }
00419     /*! 
00420     Set the max line length. No more then \p i chars will be 
00421     printed on one line.
00422     */
00423     void maxlen(size_t i)
00424     {
00425         m_maxlen = i;
00426     }
00427     /*! 
00428      Decodes [\p bit,\p eit) and write any decoded char to \p out.
00429      */
00430     template<typename InIt, typename OutIt>
00431     void process(InIt bit, InIt eit, OutIt out)
00432     {
00433         for(;bit != eit; ++bit)
00434             decodeChar(*bit, out);
00435         flush(out);
00436     }
00437     /*! 
00438      Decodes \p ic and write any decoded output char to \p out.
00439      
00440      \warning You must call flush() when all chars have been 
00441      processed by the code(...) funcion.
00442      \n
00443      \code
00444         while( (c = getchar()) != EOF )
00445             qp.process(c, out);    
00446         qp.flush();
00447      \endcode
00448      \n
00449      \sa flush()
00450      */
00451     template<typename OutIt>
00452     void process(char_type ic, OutIt& out)
00453     {
00454         decodeChar(ic, out);
00455     }
00456     /*!
00457     Write to \p out any buffered decoded char.
00458      */
00459     template<typename OutIt>
00460     void flush(OutIt& out)
00461     {
00462         /* m_prev can be (regex):
00463             empty: 
00464                 ok
00465             '=' : 
00466               malformed, '=' is last stream char, print as is
00467               (rfc2045 6.7 note 3)
00468             '=[a-zA-Z]'
00469               malformed, print as is
00470               (rfc2045 6.7 note 2)
00471             '= +'
00472               malformed, just print '=' and ignore trailing
00473               blanks (rfc2045 6.7 (3) )
00474         */
00475         int len = m_prev.length();
00476         if(len)
00477         {
00478             if(len == 1)
00479             {
00480                 assert(m_prev[0] == '=');
00481                 write('=', out);
00482             } else {
00483                 write('=', out);
00484                 if(m_prev[1] != ' ')
00485                     write(m_prev[1], out);
00486             }
00487         } else if(m_nl != 0) // stream ends with newline
00488             hardLineBrk(out);
00489 
00490     }
00491 };
00492 
00493 };
00494 
00495 
00496 } // namespace
00497 
00498 #endif
00499