1 
  
    2 
  
    3 
  
    4 
  
    5 
  
    6 using System.Collections;
  
    7 
  
    8 namespace System.Unicode
  
    9 {
  
   10     public class UnicodeEngine
  
   11     {
  
   12         public nothrow UnicodeEngine() : resultReady(false), result('\0'), state(0)
  
   13         {
  
   14         }
  
   15         public void Put(byte x)
  
   16         {
  
   17             switch (state)
  
   18             {
  
   19                 case 0:
  
   20                 {
  
   21                     resultReady = false;
  
   22                     if ((x & 0x80u) == 0u)
  
   23                     {
  
   24                         result = cast<uchar>(x);
  
   25                         resultReady = true;
  
   26                     }
  
   27                     else if ((x & 0xE0u) == 0xC0u)
  
   28                     {
  
   29                         bytes[0] = x;
  
   30                         state = 1;
  
   31                     }
  
   32                     else if ((x & 0xF0u) == 0xE0u)
  
   33                     {
  
   34                         bytes[0] = x;
  
   35                         state = 2;
  
   36                     }
  
   37                     else if ((x & 0xF8u) == 0xF0u)
  
   38                     {
  
   39                         bytes[0] = x;
  
   40                         state = 4;
  
   41                     }
  
   42                     else
  
   43                     {
  
   44                         throw UnicodeException("invalid UTF-8 sequence");
  
   45                     }
  
   46                     break;
  
   47                 }
  
   48                 case 1:
  
   49                 {
  
   50                     result = cast<uchar>(0);
  
   51                     bytes[1] = x;
  
   52                     byte b1 = bytes[1];
  
   53                     if ((b1 & 0xC0u) != 0x80u)
  
   54                     {
  
   55                         throw UnicodeException("invalid UTF-8 sequence");
  
   56                     }
  
   57                     byte shift = 0u;
  
   58                     for (byte i = 0u; i < 6u; ++i;)
  
   59                     {
  
   60                         byte bit = b1 & 1u;
  
   61                         b1 = b1 >> 1u;
  
   62                         result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
  
   63                         ++shift;
  
   64                     }
  
   65                     byte b0 = bytes[0];
  
   66                     for (byte i = 0u; i < 5u; ++i;)
  
   67                     {
  
   68                         byte bit = b0 & 1u;
  
   69                         b0 = b0 >> 1u;
  
   70                         result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
  
   71                         ++shift;
  
   72                     }
  
   73                     resultReady = true;
  
   74                     state = 0;
  
   75                     break;
  
   76                 }
  
   77                 case 2:
  
   78                 {
  
   79                     bytes[1] = x;
  
   80                     state = 3;
  
   81                     break;
  
   82                 }
  
   83                 case 3:
  
   84                 {
  
   85                     bytes[2] = x;
  
   86                     result = cast<uchar>(0);
  
   87                     byte b2 = bytes[2];
  
   88                     if ((b2 & 0xC0u) != 0x80u)
  
   89                     {
  
   90                         throw UnicodeException("invalid UTF-8 sequence");
  
   91                     }
  
   92                     byte shift = 0u;
  
   93                     for (byte i = 0u; i < 6u; ++i;)
  
   94                     {
  
   95                         byte bit = b2 & 1u;
  
   96                         b2 = b2 >> 1u;
  
   97                         result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
  
   98                         ++shift;
  
   99                     }
  
  100                     byte b1 = bytes[1];
  
  101                     if ((b1 & 0xC0u) != 0x80u)
  
  102                     {
  
  103                         throw UnicodeException("invalid UTF-8 sequence");
  
  104                     }
  
  105                     for (byte i = 0u; i < 6u; ++i;)
  
  106                     {
  
  107                         byte bit = b1 & 1u;
  
  108                         b1 = b1 >> 1u;
  
  109                         result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
  
  110                         ++shift;
  
  111                     }
  
  112                     byte b0 = bytes[0];
  
  113                     for (byte i = 0u; i < 4u; ++i;)
  
  114                     {
  
  115                         byte bit = b0 & 1u;
  
  116                         b0 = b0 >> 1u;
  
  117                         result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
  
  118                         ++shift;
  
  119                     }
  
  120                     resultReady = true;
  
  121                     state = 0;
  
  122                     break;
  
  123                 }
  
  124                 case 4:
  
  125                 {
  
  126                     bytes[1] = x;
  
  127                     state = 5;
  
  128                     break;
  
  129                 }
  
  130                 case 5:
  
  131                 {
  
  132                     bytes[2] = x;
  
  133                     state = 6;
  
  134                     break;
  
  135                 }
  
  136                 case 6:
  
  137                 {
  
  138                     bytes[3] = x;
  
  139                     result = cast<uchar>(0);
  
  140                     byte b3 = bytes[3];
  
  141                     if ((b3 & 0xC0u) != 0x80u)
  
  142                     {
  
  143                         throw UnicodeException("invalid UTF-8 sequence");
  
  144                     }
  
  145                     byte shift = 0u;
  
  146                     for (byte i = 0u; i < 6u; ++i;)
  
  147                     {
  
  148                         byte bit = b3 & 1u;
  
  149                         b3 = b3 >> 1u;
  
  150                         result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
  
  151                         ++shift;
  
  152                     }
  
  153                     byte b2 = bytes[2];
  
  154                     if ((b2 & 0xC0u) != 0x80u)
  
  155                     {
  
  156                         throw UnicodeException("invalid UTF-8 sequence");
  
  157                     }
  
  158                     for (byte i = 0u; i < 6u; ++i;)
  
  159                     {
  
  160                         byte bit = b2 & 1u;
  
  161                         b2 = b2 >> 1u;
  
  162                         result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
  
  163                         ++shift;
  
  164                     }
  
  165                     byte b1 = bytes[1];
  
  166                     if ((b1 & 0xC0u) != 0x80u)
  
  167                     {
  
  168                         throw UnicodeException("invalid UTF-8 sequence");
  
  169                     }
  
  170                     for (byte i = 0u; i < 6u; ++i;)
  
  171                     {
  
  172                         byte bit = b1 & 1u;
  
  173                         b1 = b1 >> 1u;
  
  174                         result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
  
  175                         ++shift;
  
  176                     }
  
  177                     byte b0 = bytes[0];
  
  178                     for (byte i = 0u; i < 3u; ++i;)
  
  179                     {
  
  180                         byte bit = b0 & 1u;
  
  181                         b0 = b0 >> 1u;
  
  182                         result = cast<uchar>(cast<uint>(result) | (cast<uint>(bit) << shift));
  
  183                         ++shift;
  
  184                     }
  
  185                     resultReady = true;
  
  186                     state = 0;
  
  187                     break;
  
  188                 }
  
  189             }
  
  190         }
  
  191         public inline nothrow bool ResultReady() const
  
  192         {
  
  193             return resultReady;
  
  194         }
  
  195         public nothrow uchar Get()
  
  196         {
  
  197             return result;
  
  198         }
  
  199         private bool resultReady;
  
  200         private uchar result;
  
  201         private int state;
  
  202         private byte[4] bytes;
  
  203     }
  
  204 }