// 把所有&xxx的转义;所有<xxx>取消;其它照样返回
function ConvertHTMLToken(const s:string;var inPre:boolean):string;
var
s0,s0_2,s0_3,s0_4:string;
begin
if s='' then
begin
result:='';
exit;
end;
if s[1]='&' then
begin
s0:=lowerCase(s);
result:='';
if s0=' ' then result:=' '
else if s0='"' then result:='"'
else if s0='>' then result:='>'
else if s0='<' then result:='<'
else if s0='·' then result:='·'
else if s0='™' then result:=' TM '
else if s0='©' then result:='(c)'
else if s0='®' then result:='(R)'
else if s0='&' then result:='&';
end
else if s[1]='<' then
begin
s0:=lowerCase(s);
s0_2:=copy(s0,1,2);
s0_3:=copy(s0,1,3);
s0_4:=copy(s0,1,4);
result:='';
// 将所有<hr>替换成为'——'
if s0='<br>' then result:=CR
else if s0_4='<pre' then // <pre 一定要在 <p 之前判断!
begin inPre:=true;result:=CR; end
else if s0_2='<p' then result:=CR+CR
else if s0_3='<hr' then result:=CR+MakeStr('-',40)+CR
else if s0_3='<ol' then result:=CR
else if s0_3='<ul' then result:=CR
else if s0_3='<li' then result:='·'
else if s0_4='</li' then result:=CR
else if s0_4='</tr' then result:=CR
else if s0='</td>' then result:=#9
else if s0='<title>' then result:='《'
else if s0='</title>' then result:='》'+CR+CR
else if s0='</pre>' then inPre:=false
else if copy(s0,1,6)='<table' then result:=CR
else if MarkLinks and (s0[2]='a') then
begin
CurrLink:=GetLink(s);
if CurrLink<>'' then result:='[';
end
else if MarkLinks and (s0='</a>') then
if CurrLink<>'' then result:=format(' %s ]',[CurrLink]);
end
else if inPre then
result:=s
else // 不在<pre>..</pre>内,则删除所有CR
result:=ReplaceStr(s,CR,'');
end;
begin
s0:=UnixToDos(HTMLText);
result:='';
InputLen:=length(s0);
InputIdx:=1;
inPre:=false;
CurrLink:='';
while InputIdx<=InputLen do
begin
NextToken:=GetNextToken(s0,InputIdx);
// 去除<style …> — </style>之间的内容
if lowercase(copy(NextToken,1,6))='<style' then
begin
while lowercase(NextToken)<>'</style>' do
begin
inc(InputIdx,length(NextToken));
NextToken:=GetNextToken(s0,InputIdx);
end;
inc(InputIdx,length(NextToken));
NextToken:=GetNextToken(s0,InputIdx);
end;
// 去除<Script …> — </Script>之间的内容
if lowercase(copy(NextToken,1,7))='<script' then
begin
inc(InputIdx,length(NextToken));
inQuot:=false;
i:=InputIdx-1;
while I<InputLen do
begin
inc(i);
if s0[i]='"' then
begin
inQuot:=not inQuot;
continue;
end;
if not inQuot then
// 去除<script>段里的<!– … –>注释段, 99.8.2
if copy(s0,i,4)='<!–' then
begin
HelpIdx:=pos('–>',copy(s0,i+4,MaxInt));
if HelpIdx>0 then
begin
&
nbsp; inc(i,4+HelpIdx+2);
end
else
begin
i:=InputLen;
break;
end;
end;
if lowercase(copy(s0,i,9))='</script>' then
begin
break;
end;
end;
InputIdx:=i;
end;
NextToken:=GetNextToken(s0,InputIdx);
inc(InputIdx,length(NextToken));
result:=result+ConvertHTMLToken(NextToken,inPre);
end;
end;