unit TextEncodings;
interface
uses SysUtils, Classes;
type
TUTF32LEEncoding = class(TEncoding)
strict protected
function GetByteCount(Chars: PChar; CharCount: Integer): Integer; overload; override;
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetCharCount(Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
constructor Create; virtual;
function GetMaxByteCount(CharCount: Integer): Integer; override;
function GetMaxCharCount(ByteCount: Integer): Integer; override;
function GetPreamble: TBytes; override;
end;
TUTF32BEEncoding = class(TUTF32LEEncoding)
strict protected
function GetBytes(Chars: PChar; CharCount: Integer; Bytes: PByte; ByteCount: Integer): Integer; overload; override;
function GetChars(Bytes: PByte; ByteCount: Integer; Chars: PChar; CharCount: Integer): Integer; overload; override;
public
function GetPreamble: TBytes; override;
end;
var
UTF32LEEncoding: TUTF32LEEncoding;
UTF32BEEncoding: TUTF32BEEncoding;
type
THeuristicsSetting = (hsQuick, hsNormal, hsCareful);
TTextEncoding = (teASCII, teWindows8bitCodepage, teUTF8, teUTF16LE, teUTF16BE,
teUTF32LE, teUTF32BE);
TTextEncodings = set of TTextEncoding;
function GetVCLEncoding(ATextEncoding: TTextEncoding): TEncoding;
function GetEncodingFromVCL(AEncoding: TEncoding): TTextEncoding;
const
TextEncodingsStrs: array[TTextEncoding] of string =
('teASCII', 'teWindows8bitCodepage', 'teUTF8', 'teUTF16LE', 'teUTF16BE',
'teUTF32LE', 'teUTF32BE');
type
TLineBreakType = (lbtCRLF, lbtCR, lbtLF);
PTextFileFormatInfo = ^TTextFileFormatInfo;
TTextFileFormatInfo = record
TextEncoding: TTextEncoding;
MagicWord: boolean;
LineBreakType: TLineBreakType;
procedure SetEncoding(ATextEncoding: TTextEncoding);
procedure SetMagicWord(AMagicWord: boolean);
procedure SetLineBreakType(ALineBreakType: TLineBreakType);
function GetVCLEncoding: TEncoding;
procedure SetHasMagicWord(MagicWords: TTextEncodings);
end;
const
DEFAULT_TEXT_FILE_FORMAT_INFO: TTextFileFormatInfo = (TextEncoding: teUTF8;
MagicWord: false; LineBreakType: lbtCRLF);
var
PreferASCII: boolean = false;
PreferANSI: boolean = false;
function GuessEncodingOfFile(const FileName: TFileName;
out PositiveEncoding: TTextEncoding;
out PossibleEncodings: TTextEncodings;
out MagicWordsFound: TTextEncodings;
HeuristicsSetting: THeuristicsSetting = hsNormal): boolean;
implementation
uses Windows, Math;
function EncodingCount(const ASet: TTextEncodings): integer;
var
encoding: TTextEncoding;
begin
result := 0;
for encoding in ASet do
inc(result);
end;
function Swap32(Value: cardinal): cardinal; register;
asm
bswap eax
end;
function Swap16(Value: word): word; inline;
begin
result := (Value shl 8) or (Value shr 8);
end;
function MagicBytesMatching(const Buf, MagicBytes: array of byte): boolean;
var
i: Integer;
begin
result := false;
if Length(Buf) < Length(MagicBytes) then
Exit;
for i := 0 to high(MagicBytes) do
if Buf[i] <> MagicBytes[i] then
Exit;
result := true;
end;
function GuessEncodingOfFile(const FileName: TFileName;
out PositiveEncoding: TTextEncoding;
out PossibleEncodings: TTextEncodings;
out MagicWordsFound: TTextEncodings;
HeuristicsSetting: THeuristicsSetting = hsNormal): boolean;
const
BUFFER_SIZE = 1024*1024;
type
TUTF8ByteType = (utf8single, utf8leading, utf8continuation);
TUTF16WordType = (utf16single, utf16HighSurrogate, utf16LowSurrogate);
PWordArray = ^TWordArray;
TWordArray = array[0..BUFFER_SIZE div 2 - 1] of word;
PCardinalArray = ^TCardinalArray;
TCardinalArray = array[0..BUFFER_SIZE div 4 - 1] of cardinal;
const
UTF8: array[0..2] of byte = ($EF, $BB, $BF);
UTF16LE: array[0..1] of byte = ($FF, $FE);
UTF16BE: array[0..1] of byte = ($FE, $FF);
UTF32LE: array[0..3] of byte = ($FF, $FE, 0, 0);
UTF32BE: array[0..3] of byte = (0, 0, $FE, $FF);
var
teASCIIAnsiUTF8: TTextEncoding;
NontrivialPossibilities: TTextEncodings;
nulls, oddnulls, evennulls, nullwords: Int64;
crlf, sp: Int64;
wrd0020, wrd2000: Int64;
wrd000D, wrd0D00: Int64;
wrd000A, wrd0A00: Int64;
wrdLEPUA, wrdBEPUA: Int64;
FS: TFileStream;
bytes: array of byte;
words: PWordArray;
cardinals: PCardinalArray;
len: integer;
FSize, FSizeRead: Int64;
MagicUTF8,
MagicUTF16LE,
MagicUTF16BE,
MagicUTF32LE,
MagicUTF32BE: boolean;
i: integer;
utf8type: TUTF8ByteType;
utf8count: integer;
utf8oversizewarning: boolean;
utf8cp: integer;
utf16type: TUTF16WordType;
utf16unit: word;
SurrogatePairsLE: Int64;
SurrogatePairsBE: Int64;
SurrogatePairsMax: Int64;
utf16cardinal: cardinal;
e: TTextEncoding;
tc1: cardinal;
type
TUTF16TestValue = (utf16tvLE, utf16tvBE, utf16tvFail);
function TestUTF16: TUTF16TestValue;
begin
if (teUTF16LE in PossibleEncodings) and not (teUTF16BE in PossibleEncodings) then
Exit(utf16tvLE)
else if (teUTF16BE in PossibleEncodings) and not (teUTF16LE in PossibleEncodings) then
Exit(utf16tvBE)
else if (not (teUTF16BE in PossibleEncodings)) and (not (teUTF16LE in PossibleEncodings)) then
Exit(utf16tvFail);
if (oddnulls > 4) and (oddnulls > 2*evennulls) then
Exit(utf16tvLE)
else if (evennulls > 4) and (evennulls > 2*oddnulls) then
Exit(utf16tvBE);
if wrd000D > wrd0D00 then
Exit(utf16tvLE)
else if wrd0D00 > wrd000D then
Exit(utf16tvBE);
if wrd000A > wrd0A00 then
Exit(utf16tvLE)
else if wrd0A00 > wrd000A then
Exit(utf16tvBE);
if wrd0020 > wrd2000 then
Exit(utf16tvLE)
else if wrd2000 > wrd0020 then
Exit(utf16tvBE);
if (wrdLEPUA = 0) and (wrdBEPUA > 0) then
Exit(utf16tvLE)
else if (wrdLEPUA > 0) and (wrdBEPUA = 0) then
Exit(utf16tvBE);
Exit(utf16tvLE);
end;
function TestUTF16PUA: TUTF16TestValue;
begin
if (wrdLEPUA = 0) and (wrdBEPUA > 0) and (teUTF16LE in PossibleEncodings) then
Exit(utf16tvLE)
else if (wrdLEPUA > 0) and (wrdBEPUA = 0) and (teUTF16BE in PossibleEncodings) then
Exit(utf16tvBE)
else
Exit(utf16tvFail);
end;
begin
PositiveEncoding := teWindows8bitCodepage;
result := false;
PossibleEncodings := [teASCII, teWindows8bitCodepage, teUTF8, teUTF16LE,
teUTF16BE, teUTF32LE, teUTF32BE];
if PreferASCII then
teASCIIAnsiUTF8 := teASCII
else if PreferANSI then
teASCIIAnsiUTF8 := teWindows8bitCodepage
else
teASCIIAnsiUTF8 := teUTF8;
FS := TFileStream.Create(FileName, fmOpenRead);
try
FSize := FS.Size;
FSizeRead := FSize;
if FSize = 0 then
begin
PositiveEncoding := teASCIIAnsiUTF8;
Exit(true);
end;
MagicUTF8 := false;
MagicUTF16LE := false;
MagicUTF16BE := false;
MagicUTF32LE := false;
MagicUTF32BE := false;
SetLength(bytes, 4);
len := FS.Read(bytes[0], length(bytes));
SetLength(bytes, len);
MagicWordsFound := [];
if MagicBytesMatching(bytes, UTF32LE) then
begin
MagicUTF32LE := true;
Include(MagicWordsFound, teUTF32LE);
if HeuristicsSetting = hsQuick then
begin
PositiveEncoding := teUTF32LE;
Exit(true);
end;
end;
if MagicBytesMatching(bytes, UTF32BE) then
begin
MagicUTF32BE := true;
Include(MagicWordsFound, teUTF32BE);
if HeuristicsSetting = hsQuick then
begin
PositiveEncoding := teUTF32BE;
Exit(true);
end;
end;
if MagicBytesMatching(bytes, UTF8) then
begin
MagicUTF8 := true;
Include(MagicWordsFound, teUTF8);
if HeuristicsSetting = hsQuick then
begin
PositiveEncoding := teUTF8;
Exit(true);
end;
end;
if MagicBytesMatching(bytes, UTF16LE) then
begin
MagicUTF16LE := true;
Include(MagicWordsFound, teUTF16LE);
if HeuristicsSetting = hsQuick then
begin
PositiveEncoding := teUTF16LE;
Exit(true);
end;
end;
if MagicBytesMatching(bytes, UTF16BE) then
begin
MagicUTF16BE := true;
Include(MagicWordsFound, teUTF16BE);
if HeuristicsSetting = hsQuick then
begin
PositiveEncoding := teUTF16BE;
Exit(true);
end;
end;
if FSize mod 2 <> 0 then
begin
Exclude(PossibleEncodings, teUTF16LE);
Exclude(PossibleEncodings, teUTF16BE);
end;
if FSize mod 4 <> 0 then
begin
Exclude(PossibleEncodings, teUTF32LE);
Exclude(PossibleEncodings, teUTF32BE);
end;
Assert(BUFFER_SIZE mod 4 = 0);
nulls := 0;
oddnulls := 0;
evennulls := 0;
nullwords := 0;
crlf := 0;
sp := 0;
wrd0020 := 0;
wrd2000 := 0;
wrd000D := 0;
wrd0D00 := 0;
wrd000A := 0;
wrd0A00 := 0;
wrdLEPUA := 0;
wrdBEPUA := 0;
SurrogatePairsLE := 0;
SurrogatePairsBE := 0;
utf8type := utf8single;
utf8count := 0;
utf8oversizewarning := false;
utf16type := utf16single;
tc1 := GetTickCount;
FS.Position := 0;
while FS.Position < FSize do
begin
if PossibleEncodings = [teWindows8bitCodepage] then
break;
if HeuristicsSetting <> hsCareful then
if GetTickCount - tc1 > 1000 then
begin
FSizeRead := FS.Position;
break;
end;
SetLength(bytes, BUFFER_SIZE);
len := FS.Read(bytes[0], BUFFER_SIZE);
if len <> BUFFER_SIZE then
SetLength(bytes, len);
words := PWordArray(@bytes[0]);
cardinals := PCardinalArray(@bytes[0]);
for i := 0 to length(bytes) div 2 - 1 do
if words[i] = 0 then
inc(nullwords)
else if words[i] = $0D0A then
inc(crlf)
else if words[i] = $0020 then
inc(wrd0020)
else if words[i] = $2000 then
inc(wrd2000)
else if words[i] = $000D then
inc(wrd000D)
else if words[i] = $0D00 then
inc(wrd0D00)
else if words[i] = $000A then
inc(wrd000A)
else if words[i] = $0A00 then
inc(wrd0A00)
else if InRange(words[i], $E000, $F8FF) then
inc(wrdLEPUA)
else if InRange(Swap16(words[i]), $E000, $F8FF) then
inc(wrdBEPUA);
for i := 0 to high(bytes) do
if bytes[i] = 0 then
begin
inc(nulls);
if odd(i) then
inc(oddnulls);
end
else if bytes[i] = $20 then
inc(sp);
evennulls := nulls - oddnulls;
if teASCII in PossibleEncodings then
for i := 0 to high(bytes) do
if bytes[i] > 127 then
begin
Exclude(PossibleEncodings, teASCII);
break;
end;
if teUTF8 in PossibleEncodings then
begin
if not (teASCII in PossibleEncodings) then
for i := 0 to high(bytes) do
begin
if bytes[i] <= 127 then
begin
if utf8count > 0 then
begin
Exclude(PossibleEncodings, teUTF8);
break;
end;
utf8type := utf8single;
end
else if bytes[i] and $C0 = $80 then
begin
if (utf8type = utf8single) or (utf8count = 0) then
begin
Exclude(PossibleEncodings, teUTF8);
break;
end;
utf8type := utf8continuation;
dec(utf8count);
if utf8oversizewarning then
begin
inc(utf8cp, bytes[i] and $3F);
if (utf8count = 0) and (utf8cp > $10FFFF) then
begin
Exclude(PossibleEncodings, teUTF8);
break;
end;
end;
end
else
begin
if (utf8type = utf8leading) or (utf8count > 0) then
begin
Exclude(PossibleEncodings, teUTF8);
break;
end;
utf8type := utf8leading;
utf8oversizewarning := false;
if (bytes[i] and $E0 = $C0) then
utf8count := 1
else if (bytes[i] and $F0 = $E0) then
utf8count := 2
else if (bytes[i] and $F8 = $F0) then
begin
utf8count := 3;
utf8oversizewarning := true;
utf8cp := bytes[i] and $7;
end
else
begin
Exclude(PossibleEncodings, teUTF8);
break;
end;
end;
end;
end;
if teUTF16LE in PossibleEncodings then
for i := 0 to length(bytes) div 2 - 1 do
begin
utf16unit := words[i];
if (utf16unit <= $D7FF) or (utf16unit >= $E000) then
begin
if utf16type = utf16HighSurrogate then
begin
Exclude(PossibleEncodings, teUTF16LE);
break;
end;
utf16type := utf16single;
end
else if utf16unit < $DC00 then
begin
if utf16type = utf16HighSurrogate then
begin
Exclude(PossibleEncodings, teUTF16LE);
break;
end;
utf16type := utf16HighSurrogate;
end
else
begin
if utf16type <> utf16HighSurrogate then
begin
Exclude(PossibleEncodings, teUTF16LE);
break;
end;
utf16type := utf16LowSurrogate;
inc(SurrogatePairsLE);
end;
end;
if teUTF16BE in PossibleEncodings then
for i := 0 to length(bytes) div 2 - 1 do
begin
utf16unit := Swap16(words[i]);
if (utf16unit <= $D7FF) or (utf16unit >= $E000) then
begin
if utf16type = utf16HighSurrogate then
begin
Exclude(PossibleEncodings, teUTF16BE);
break;
end;
utf16type := utf16single;
end
else if utf16unit < $DC00 then
begin
if utf16type = utf16HighSurrogate then
begin
Exclude(PossibleEncodings, teUTF16BE);
break;
end;
utf16type := utf16HighSurrogate;
end
else
begin
if utf16type <> utf16HighSurrogate then
begin
Exclude(PossibleEncodings, teUTF16BE);
break;
end;
utf16type := utf16LowSurrogate;
inc(SurrogatePairsBE);
end;
end;
if teUTF32LE in PossibleEncodings then
for i := 0 to length(bytes) div 4 - 1 do
if cardinals[i] > $10FFFF then
begin
Exclude(PossibleEncodings, teUTF32LE);
break;
end;
if teUTF32BE in PossibleEncodings then
for i := 0 to length(bytes) div 4 - 1 do
begin
utf16cardinal := Swap32(cardinals[i]);
if utf16cardinal > $10FFFF then
begin
Exclude(PossibleEncodings, teUTF32BE);
break;
end;
end;
end ;
finally
FS.Free;
end;
SurrogatePairsMax := max(SurrogatePairsLE, SurrogatePairsBE);
NontrivialPossibilities := PossibleEncodings - [teWindows8bitCodepage];
if EncodingCount(NontrivialPossibilities) = 0 then
begin
PositiveEncoding := teWindows8bitCodepage;
Exit(true);
end;
if nulls = FSizeRead then
begin
PositiveEncoding := teWindows8bitCodepage;
Exit(true);
end;
if MagicUTF8 and (teUTF8 in PossibleEncodings) then
begin
PositiveEncoding := teUTF8;
Exit(true);
end
else if MagicUTF32LE and (teUTF32LE in PossibleEncodings) then
begin
PositiveEncoding := teUTF32LE;
Exit(true);
end
else if MagicUTF32BE and (teUTF32BE in PossibleEncodings) then
begin
PositiveEncoding := teUTF32BE;
Exit(true);
end
else if MagicUTF16LE and (teUTF16LE in PossibleEncodings) then
begin
PositiveEncoding := teUTF16LE;
Exit(true);
end
else if MagicUTF16BE and (teUTF16BE in PossibleEncodings) then
begin
PositiveEncoding := teUTF16BE;
Exit(true);
end;
if (teUTF8 in PossibleEncodings) and (not (teASCII in PossibleEncodings)) and (nulls = 0) then
begin
PositiveEncoding := teUTF8;
Exit(true);
end;
if (teASCII in PossibleEncodings) and (nulls = 0) then
begin
PositiveEncoding := teASCIIAnsiUTF8;
Exit(true);
end
else if (teASCII in PossibleEncodings) then
begin
if nullwords > 0 then
begin
if teUTF32LE in PossibleEncodings then
begin
PositiveEncoding := teUTF32LE;
Exit(true);
end;
if teUTF32BE in PossibleEncodings then
begin
PositiveEncoding := teUTF32BE;
Exit(true);
end;
if (nullwords = 2*nulls) or (nulls > 10*nullwords) then
begin
case TestUTF16 of
utf16tvLE:
begin
PositiveEncoding := teUTF16LE;
Exit(true);
end;
utf16tvBE:
begin
PositiveEncoding := teUTF16BE;
Exit(true);
end;
end;
end;
PositiveEncoding := teASCIIAnsiUTF8;
Exit(true);
end
else
begin
case TestUTF16 of
utf16tvLE:
begin
PositiveEncoding := teUTF16LE;
Exit(true);
end;
utf16tvBE:
begin
PositiveEncoding := teUTF16BE;
Exit(true);
end;
end;
end;
end
else if nulls = 0 then
begin
if crlf > 0 then
begin
PositiveEncoding := teWindows8bitCodepage;
Exit(true);
end;
if sp > FSizeRead div 12 then
begin
PositiveEncoding := teWindows8bitCodepage;
Exit(true);
end;
if (FSizeRead > 4096) or (SurrogatePairsMax > FSizeRead div (4*4)) then
begin
case TestUTF16 of
utf16tvLE:
begin
PositiveEncoding := teUTF16LE;
Exit(true);
end;
utf16tvBE:
begin
PositiveEncoding := teUTF16BE;
Exit(true);
end;
end;
end;
if SurrogatePairsMax = 0 then
begin
case TestUTF16PUA of
utf16tvLE:
begin
PositiveEncoding := teUTF16LE;
Exit(false);
end;
utf16tvBE:
begin
PositiveEncoding := teUTF16BE;
Exit(false);
end;
utf16tvFail:
begin
PositiveEncoding := teWindows8bitCodepage;
Exit(false);
end;
end;
end;
case TestUTF16 of
utf16tvLE:
begin
PositiveEncoding := teUTF16LE;
Exit(true);
end;
utf16tvBE:
begin
PositiveEncoding := teUTF16BE;
Exit(true);
end;
end;
PositiveEncoding := teWindows8bitCodepage;
Exit(true);
end
else
begin
if nullwords > 0 then
begin
if teUTF32LE in PossibleEncodings then
begin
PositiveEncoding := teUTF32LE;
Exit(true);
end;
if teUTF32BE in PossibleEncodings then
begin
PositiveEncoding := teUTF32BE;
Exit(true);
end;
PositiveEncoding := teWindows8bitCodepage;
Exit(true);
end
else
begin
if SurrogatePairsMax > FSizeRead div (4*20) then
begin
case TestUTF16 of
utf16tvLE:
begin
PositiveEncoding := teUTF16LE;
Exit(true);
end;
utf16tvBE:
begin
PositiveEncoding := teUTF16BE;
Exit(true);
end;
end;
end;
if teUTF32LE in PossibleEncodings then
begin
PositiveEncoding := teUTF32LE;
Exit(true);
end;
if teUTF32BE in PossibleEncodings then
begin
PositiveEncoding := teUTF32BE;
Exit(true);
end;
case TestUTF16 of
utf16tvLE:
begin
PositiveEncoding := teUTF16LE;
Exit(true);
end;
utf16tvBE:
begin
PositiveEncoding := teUTF16BE;
Exit(true);
end;
end;
PositiveEncoding := teWindows8bitCodepage;
Exit(true);
end;
end;
if result then
Assert(PositiveEncoding in PossibleEncodings, 'PositiveEncoding not in PossibleEncodings');
end;
function GetVCLEncoding(ATextEncoding: TTextEncoding): TEncoding;
begin
result := nil;
case ATextEncoding of
teASCII:
result := TEncoding.ASCII;
teWindows8bitCodepage:
result := TEncoding.Default;
teUTF8:
result := TEncoding.UTF8;
teUTF16LE:
result := TEncoding.Unicode;
teUTF16BE:
result := TEncoding.BigEndianUnicode;
teUTF32LE:
result := TextEncodings.UTF32LEEncoding;
teUTF32BE:
result := TextEncodings.UTF32BEEncoding;
end;
end;
function GetEncodingFromVCL(AEncoding: TEncoding): TTextEncoding;
begin
result := teWindows8bitCodepage;
if AEncoding = TEncoding.ASCII then
result := teASCII
else if AEncoding = TEncoding.Default then
result := teWindows8bitCodepage
else if AEncoding = TEncoding.UTF8 then
result := teUTF8
else if AEncoding = TEncoding.Unicode then
result := teUTF16LE
else if AEncoding = TEncoding.BigEndianUnicode then
result := teUTF16BE
else if AEncoding = TextEncodings.UTF32LEEncoding then
result := teUTF32LE
else if AEncoding = TextEncodings.UTF32BEEncoding then
result := teUTF32BE;
end;
function TTextFileFormatInfo.GetVCLEncoding: TEncoding;
begin
result := TextEncodings.GetVCLEncoding(Self.TextEncoding);
end;
procedure TTextFileFormatInfo.SetEncoding(ATextEncoding: TTextEncoding);
begin
TextEncoding := ATextEncoding;
end;
procedure TTextFileFormatInfo.SetHasMagicWord(MagicWords: TTextEncodings);
begin
MagicWord := TextEncoding in MagicWords;
end;
procedure TTextFileFormatInfo.SetLineBreakType(ALineBreakType: TLineBreakType);
begin
LineBreakType := ALineBreakType;
end;
procedure TTextFileFormatInfo.SetMagicWord(AMagicWord: boolean);
begin
MagicWord := AMagicWord;
end;
constructor TUTF32LEEncoding.Create;
begin
FIsSingleByte := false;
FMaxCharSize := 4;
end;
function TUTF32LEEncoding.GetByteCount(Chars: PChar;
CharCount: Integer): Integer;
begin
result := 4*CharCount;
end;
function TUTF32LEEncoding.GetBytes(Chars: PChar; CharCount: Integer;
Bytes: PByte; ByteCount: Integer): Integer;
var
i: Integer;
begin
for i := 0 to CharCount - 1 do
begin
PCardinal(Bytes)^ := cardinal(ord(Chars^));
inc(Chars);
inc(Bytes, 4);
end;
result := 4*CharCount;
end;
function TUTF32LEEncoding.GetCharCount(Bytes: PByte;
ByteCount: Integer): Integer;
begin
result := ByteCount div 4;
end;
function TUTF32LEEncoding.GetChars(Bytes: PByte; ByteCount: Integer;
Chars: PChar; CharCount: Integer): Integer;
var
i: Integer;
begin
for i := 0 to CharCount - 1 do
begin
Chars^ := Char(Word(PCardinal(Bytes)^));
inc(Chars);
inc(Bytes, 4);
end;
result := ByteCount div 4;
end;
function TUTF32LEEncoding.GetMaxByteCount(CharCount: Integer): Integer;
begin
result := 4*CharCount;
end;
function TUTF32LEEncoding.GetMaxCharCount(ByteCount: Integer): Integer;
begin
result := ByteCount div 4 + 1;
end;
function TUTF32LEEncoding.GetPreamble: TBytes;
begin
SetLength(result, 4);
result[0] := $FF;
result[1] := $FE;
result[2] := 0;
result[3] := 0;
end;
function TUTF32BEEncoding.GetBytes(Chars: PChar; CharCount: Integer;
Bytes: PByte; ByteCount: Integer): Integer;
var
i: Integer;
begin
for i := 0 to CharCount - 1 do
begin
PCardinal(Bytes)^ := Swap32(cardinal(ord(Chars^)));
inc(Chars);
inc(Bytes, 4);
end;
result := 4*CharCount;
end;
function TUTF32BEEncoding.GetChars(Bytes: PByte; ByteCount: Integer;
Chars: PChar; CharCount: Integer): Integer;
var
i: Integer;
begin
for i := 0 to CharCount - 1 do
begin
Chars^ := Char(Word(Swap32(PCardinal(Bytes)^)));
inc(Chars);
inc(Bytes, 4);
end;
result := ByteCount div 4;
end;
function TUTF32BEEncoding.GetPreamble: TBytes;
begin
SetLength(result, 4);
result[0] := 0;
result[1] := 0;
result[2] := $FE;
result[3] := $FF;
end;
initialization
UTF32LEEncoding := TUTF32LEEncoding.Create;
UTF32BEEncoding := TUTF32BEEncoding.Create;
finalization
UTF32LEEncoding.Free;
UTF32BEEncoding.Free;
end.