Logo Search packages:      
Sourcecode: pantomime version File versions  Download package

UTF8.m

/*
**  UTF8.m
**
**  Copyright (c) 2001, 2002
**
**  Author: Vincent Ricard <vricard@wanadoo.fr>
**
**  This library is free software; you can redistribute it and/or
**  modify it under the terms of the GNU Lesser General Public
**  License as published by the Free Software Foundation; either
**  version 2.1 of the License, or (at your option) any later version.
**  
**  This library is distributed in the hope that it will be useful,
**  but WITHOUT ANY WARRANTY; without even the implied warranty of
**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
**  Lesser General Public License for more details.
**  
**  You should have received a copy of the GNU Lesser General Public
**  License along with this library; if not, write to the Free Software
**  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/

#include <Pantomime/UTF8.h>

#include <Pantomime/Constants.h>

#include <Foundation/NSString.h>

#include <ctype.h>
#include <stdio.h>

@implementation UTF8

// convert 0xc1c2
// c1 and c2 must be tested with isxdigit() before to call this method
+ (unsigned char) convertHexaCodeToByte: (unsigned char) c1 and: (unsigned char) c2
{
  unsigned char byte = 0;

  if ( isdigit(c1) )
    {
      byte += c1 - '0';
    }
  else if ( islower(c1) )
    {
      byte += 10 + c1 - 'a';
    }
  else
    {
      byte += 10 + c1 - 'A';
    }

  byte <<= 4;

  if ( isdigit(c2) )
    {
      byte += c2 - '0';
    }
  else if ( islower(c2) )
    {
      byte += 10 + c2 - 'a';
    }
  else
    {
      byte += 10 + c2 - 'A';
    }

  return byte;
}

+ (NSString *) decodeString: (NSString *) theString
{
  NSMutableString *result;
  int i;

  result = [[NSMutableString alloc] initWithString: theString];

  for ( i = 0; i < [result length]; )
    {
      int j = i;
      if ( ([result characterAtIndex: i] == '=') && (i + 2 < [result length]) &&
           (isxdigit([result characterAtIndex: i + 1])) &&
           (isxdigit([result characterAtIndex: i + 2])) )
        {
          // It's a unicode character
          unichar c = 0;
          unsigned char pattern = [self convertHexaCodeToByte: [result characterAtIndex: i + 1]
                                        and: [result characterAtIndex: i + 2]];

          i += 3;

          if ( pattern < 0x80 )
            {
              // 0xxxxxxx
              c = pattern & (~0x80);
            }
          else if ( 0xC0 == (pattern & 0xC0) )
            {
              // 110xxxxx 10xxxxxx
              c = pattern & (~0xC0);

              if ( ([result characterAtIndex: i] == '=') && (i + 2 < [result length]) &&
                   (isxdigit([result characterAtIndex: i + 1])) &&
                   (isxdigit([result characterAtIndex: i + 2])) )
                {
                  unsigned char p = [self convertHexaCodeToByte: [result characterAtIndex: i + 1]
                                          and: [result characterAtIndex: i + 2]];
                  c <<= 6;
                  c |= p & (~0x80);
                  i += 3;
                }
            }
          else if ( 0xE0 == (pattern & 0xE0) )
            {
              // 1110xxxx 10xxxxxx 10xxxxxx
              int times = 2;
              c = pattern & (~0xE0);

              while ( times )
                {
                  if ( ([result characterAtIndex: i] == '=') && (i + 2 < [result length]) &&
                       (isxdigit([result characterAtIndex: i + 1])) &&
                       (isxdigit([result characterAtIndex: i + 2])) )
                    {
                      unsigned char p = [self convertHexaCodeToByte: [result characterAtIndex: i + 1]
                                              and: [result characterAtIndex: i + 2]];
                      c <<= 6;
                      c |= p & (~0x80);
                      i += 3;
                    }
                  times--;
                }
            }
          else if ( 0xF0 == (pattern & 0xF0) )
            {
              // IMPOSSIBLE to decode if unichar is unsigned short (16 bits)
              // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
              int times = 3;
              c = pattern & (~0xF0);

              while ( times )
                {
                  if ( ([result characterAtIndex: i] == '=') && (i + 2 < [result length]) &&
                       (isxdigit([result characterAtIndex: i + 1])) &&
                       (isxdigit([result characterAtIndex: i + 2])) )
                    {
                      unsigned char p = [self convertHexaCodeToByte: [result characterAtIndex: i + 1]
                                              and: [result characterAtIndex: i + 2]];
                      c <<= 6;
                      c |= p & (~0x80);
                      i += 3;
                    }
                  times--;
                }
            }
          else if ( 0xF8 == (pattern & 0xF8) )
            {
              // IMPOSSIBLE to decode if unichar is unsigned short (16 bits)
              // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
              int times = 4;
              c = pattern & (~0xF8);

              while ( times )
                {
                  if ( ([result characterAtIndex: i] == '=') && (i + 2 < [result length]) &&
                       (isxdigit([result characterAtIndex: i + 1])) &&
                       (isxdigit([result characterAtIndex: i + 2])) )
                    {
                      unsigned char p = [self convertHexaCodeToByte: [result characterAtIndex: i + 1]
                                              and: [result characterAtIndex: i + 2]];
                      c <<= 6;
                      c |= p & (~0x80);
                      i += 3;
                    }
                  times--;
                }
            }
          else if ( 0xFC == (pattern & 0xFC) )
            {
              // IMPOSSIBLE to decode if unichar is unsigned short (16 bits)
              // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
              int times = 5;
              c = pattern & (~0xFC);

              while ( times )
                {
                  if ( ([result characterAtIndex: i] == '=') && (i + 2 < [result length]) &&
                       (isxdigit([result characterAtIndex: i + 1])) &&
                       (isxdigit([result characterAtIndex: i + 2])) )
                    {
                      unsigned char p = [self convertHexaCodeToByte: [result characterAtIndex: i + 1]
                                              and: [result characterAtIndex: i + 2]];
                      c <<= 6;
                      c |= p & (~0x80);
                      i += 3;
                    }
                  times--;
                }
            }
          [result replaceCharactersInRange: NSMakeRange(j, i-j)
                  withString: [NSString stringWithCharacters: &c length: 1]];
        }
      i = ++j;
    }

  return AUTORELEASE(result);
}

@end

Generated by  Doxygen 1.6.0   Back to index