以下是大富翁cee写的一个实用函数,实现HTML到TXT格式的转换。
以下过程经过了几年的无数的测试,稳定性相当好。输出的结果也较美观。
function HtmlToTxt(const HTMLText:string;MarkLinks:boolean):string;
const
CR=#13#10;
var
NextToken,s0:string;
i:integer;
HelpIdx:integer;
inQuot:boolean; // 去除<script>段之用
InputLen:integer;
InputIdx:integer; // 指向输入字符的下一个待处理字符
inPre:boolean; // 表示是否在<pre>…</pre>段内
CurrLink:string;
function MakeStr(C: Char; N: Integer): string;
begin
if N < 1 then Result := ''
else begin
{$IFNDEF WIN32}
if N > 255 then N := 255;
{$ENDIF WIN32}
SetLength(Result, N);
FillChar(Result[1], Length(Result), C);
end;
end;
function NPos(const C: string; S: string; N: Integer): Integer;
var
I, P, K: Integer;
begin
Result := 0;
K := 0;
for I := 1 to N do begin
P := Pos(C, S);
Inc(K, P);
if (I = N) and (P > 0) then begin
Result := K;
Exit;
end;
if P > 0 then Delete(S, 1, P)
else Exit;
end;
end;
function ReplaceStr(const S, Srch, Replace: string): string;
var
I: Integer;
Source: string;
begin
Source := S;
Result := '';
repeat
I := Pos(Srch, Source);
if I > 0 then begin
Result := Result + Copy(Source, 1, I – 1) + Replace;
Source := Copy(Source, I + Length(Srch), MaxInt);
end
else Result := Result + Source;
until I <= 0;
end;
function UnixToDos(const s:string):string;
begin
result:=AdjustLineBreaks(s);
end;
// 取得下一段字符串
function GetNextToken(const s:string; const StartIdx:integer):string;
var
i:integer;
begin
if StartIdx>length(s) then
begin
result:='';
exit;
end;
result:=s[StartIdx];
if result='&' then
begin
for i:=StartIdx+1 to length(s) do
begin
if s[i] in ['&',' ',#13,'<'] then break;
result:=result+s[i];
if s[i]=';' then break;
end;
end
else if result='<' then
begin
for i:=StartIdx+1 to length(s) do
begin
result:=result+s[i];
if s[i]='>' then break;
end;
end
else
begin
for i:=StartIdx+1 to length(s) do
if s[i] in ['&','<'] then break
else result:=result+s[i];
end;
end;
// 输入:<a href="http://anjo.delphibbs.com">
// 输出:http://anjo.delphibbs.com
function GetLink(s:string):string;
var
LPos,RPos,LQuot,RQuot:integer;
begin
result:='';
// 去掉'….<'
LPos:=pos('<',s);
if LPos=0 then exit;
delete(s,1,LPos);
s:=Trim(s);
// 去掉'>….'
RPos:=pos('>',s);
if RPos=0 then exit;
delete(s,RPos,MaxInt);
if uppercase(copy(s,1,2))='A ' then
begin
LPos:=pos('HREF',uppercase(s));
if LPos=0 then exit;
LQuot:=NPos('"',s,1);
RQuot:=NPos('"',s,2);
if (LQuot<LPos) or (RQuot>RPos) then exit;
// 开头带'#'的超链接,视为无效
if s[LQuot+1]='#' then exit;
// 开头带'javascript:'的超链接,也视为无效
// 如:<div align=right><a href="javascript:window.close()"><IMG SRC="button_close.gif"></a></div>
if copy(s,LQuot+1,11)='javascript:' then exit;
result:=copy(s,LQuot+1,RQuot-LQuot-1);
end;
end;