{"id":651,"date":"2008-06-08T18:43:57","date_gmt":"2008-06-08T18:43:57","guid":{"rendered":"http:\/\/fanhaijun.com\/?p=651"},"modified":"2008-06-08T18:43:57","modified_gmt":"2008-06-08T18:43:57","slug":"html-to-txt-part-2","status":"publish","type":"post","link":"https:\/\/fanhaijun.com\/?p=651","title":{"rendered":"HTML To TXT (Part 2)"},"content":{"rendered":"<p>\/\/&nbsp;\u628a\u6240\u6709&#038;xxx\u7684\u8f6c\u4e49\uff1b\u6240\u6709&lt;xxx&gt;\u53d6\u6d88\uff1b\u5176\u5b83\u7167\u6837\u8fd4\u56de<br \/>&nbsp;&nbsp;function&nbsp;ConvertHTMLToken(const&nbsp;s:string;var&nbsp;inPre:boolean):string;<br \/>&nbsp;&nbsp;var<br \/>&nbsp;&nbsp;&nbsp;&nbsp;s0,s0_2,s0_3,s0_4:string;<br \/>&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;s=&#39;&#39;&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;result:=&#39;&#39;;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;exit;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;end;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;s[1]=&#39;&#038;&#39;&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;s0:=lowerCase(s);<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;result:=&#39;&#39;;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;s0=&#39;&nbsp;&#39;&nbsp;then&nbsp;result:=&#39;&nbsp;&#39;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&quot;&#39;&nbsp;then&nbsp;result:=&#39;&quot;&#39;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&gt;&#39;&nbsp;then&nbsp;result:=&#39;&gt;&#39;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&lt;&#39;&nbsp;then&nbsp;result:=&#39;&lt;&#39;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&middot;&#39;&nbsp;then&nbsp;result:=&#39;\u00b7&#39;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&trade;&#39;&nbsp;then&nbsp;result:=&#39;&nbsp;TM&nbsp;&#39;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&copy;&#39;&nbsp;then&nbsp;result:=&#39;(c)&#39;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&reg;&#39;&nbsp;then&nbsp;result:=&#39;(R)&#39;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&#038;amp&#39;&nbsp;then&nbsp;result:=&#39;&#038;&#39;;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;end<br \/>&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s[1]=&#39;&lt;&#39;&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;s0:=lowerCase(s);<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;s0_2:=copy(s0,1,2);<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;s0_3:=copy(s0,1,3);<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;s0_4:=copy(s0,1,4);<br \/>&nbsp;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;result:=&#39;&#39;;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\/\/&nbsp;\u5c06\u6240\u6709&lt;hr&gt;\u66ff\u6362\u6210\u4e3a&#39;&#8212;&#8212;&#39;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;s0=&#39;&lt;br&gt;&#39;&nbsp;then&nbsp;result:=CR<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0_4=&#39;&lt;pre&#39;&nbsp;then&nbsp;&nbsp;&nbsp;\/\/&nbsp;&lt;pre&nbsp;\u4e00\u5b9a\u8981\u5728&nbsp;&lt;p&nbsp;\u4e4b\u524d\u5224\u65ad\uff01<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;begin&nbsp;inPre:=true;result:=CR;&nbsp;end<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0_2=&#39;&lt;p&#39;&nbsp;then&nbsp;result:=CR+CR<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0_3=&#39;&lt;hr&#39;&nbsp;then&nbsp;result:=CR+MakeStr(&#39;-&#39;,40)+CR<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0_3=&#39;&lt;ol&#39;&nbsp;then&nbsp;result:=CR<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0_3=&#39;&lt;ul&#39;&nbsp;then&nbsp;result:=CR<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0_3=&#39;&lt;li&#39;&nbsp;then&nbsp;result:=&#39;\u00b7&#39;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0_4=&#39;&lt;\/li&#39;&nbsp;then&nbsp;result:=CR<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0_4=&#39;&lt;\/tr&#39;&nbsp;then&nbsp;result:=CR<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&lt;\/td&gt;&#39;&nbsp;then&nbsp;result:=#9<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&lt;title&gt;&#39;&nbsp;then&nbsp;result:=&#39;\u300a&#39;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&lt;\/title&gt;&#39;&nbsp;then&nbsp;result:=&#39;\u300b&#39;+CR+CR<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;s0=&#39;&lt;\/pre&gt;&#39;&nbsp;then&nbsp;inPre:=false<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;copy(s0,1,6)=&#39;&lt;table&#39;&nbsp;then&nbsp;result:=CR<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;MarkLinks&nbsp;and&nbsp;(s0[2]=&#39;a&#39;)&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;CurrLink:=GetLink(s);<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;CurrLink&lt;&gt;&#39;&#39;&nbsp;then&nbsp;result:=&#39;[&#39;;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;MarkLinks&nbsp;and&nbsp;(s0=&#39;&lt;\/a&gt;&#39;)&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;CurrLink&lt;&gt;&#39;&#39;&nbsp;then&nbsp;result:=format(&#39;&nbsp;%s&nbsp;]&#39;,[CurrLink]);<br \/>&nbsp;&nbsp;&nbsp;&nbsp;end<br \/>&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;if&nbsp;inPre&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;result:=s<br \/>&nbsp;&nbsp;&nbsp;&nbsp;else&nbsp;\/\/&nbsp;\u4e0d\u5728&lt;pre&gt;..&lt;\/pre&gt;\u5185\uff0c\u5219\u5220\u9664\u6240\u6709CR<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;result:=ReplaceStr(s,CR,&#39;&#39;);<br \/>&nbsp;&nbsp;end;<\/p>\n<p>begin<br \/>&nbsp;&nbsp;s0:=UnixToDos(HTMLText);<br \/>&nbsp;&nbsp;result:=&#39;&#39;;<br \/>&nbsp;&nbsp;InputLen:=length(s0);<br \/>&nbsp;&nbsp;InputIdx:=1;<br \/>&nbsp;&nbsp;inPre:=false;<br \/>&nbsp;&nbsp;CurrLink:=&#39;&#39;;<\/p>\n<p>&nbsp;&nbsp;while&nbsp;InputIdx&lt;=InputLen&nbsp;do<br \/>&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;NextToken:=GetNextToken(s0,InputIdx);<\/p>\n<p>&nbsp;&nbsp;&nbsp;&nbsp;\/\/&nbsp;\u53bb\u9664&lt;style&nbsp;&#8230;&gt;&nbsp;&#8212;&nbsp;&lt;\/style&gt;\u4e4b\u95f4\u7684\u5185\u5bb9<br \/>&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;lowercase(copy(NextToken,1,6))=&#39;&lt;style&#39;&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;while&nbsp;lowercase(NextToken)&lt;&gt;&#39;&lt;\/style&gt;&#39;&nbsp;do<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;inc(InputIdx,length(NextToken));<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;NextToken:=GetNextToken(s0,InputIdx);<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;inc(InputIdx,length(NextToken));<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;NextToken:=GetNextToken(s0,InputIdx);<br \/>&nbsp;&nbsp;&nbsp;&nbsp;end;<\/p>\n<p>&nbsp;&nbsp;&nbsp;&nbsp;\/\/&nbsp;\u53bb\u9664&lt;Script&nbsp;&#8230;&gt;&nbsp;&#8212;&nbsp;&lt;\/Script&gt;\u4e4b\u95f4\u7684\u5185\u5bb9<br \/>&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;lowercase(copy(NextToken,1,7))=&#39;&lt;script&#39;&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;inc(InputIdx,length(NextToken));<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;inQuot:=false;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;i:=InputIdx-1;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;while&nbsp;I&lt;InputLen&nbsp;do<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;inc(i);<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;s0[i]=&#39;&quot;&#39;&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;inQuot:=not&nbsp;inQuot;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;continue;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;not&nbsp;inQuot&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;\/\/&nbsp;\u53bb\u9664&lt;script&gt;\u6bb5\u91cc\u7684&lt;!&#8211;&nbsp;&#8230;&nbsp;&#8211;&gt;\u6ce8\u91ca\u6bb5,&nbsp;99.8.2<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;copy(s0,i,4)=&#39;&lt;!&#8211;&#39;&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;HelpIdx:=pos(&#39;&#8211;&gt;&#39;,copy(s0,i+4,MaxInt));<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;HelpIdx&gt;0&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&#038;<br \/>\nnbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;inc(i,4+HelpIdx+2);<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;else<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;i:=InputLen;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;break;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;if&nbsp;lowercase(copy(s0,i,9))=&#39;&lt;\/script&gt;&#39;&nbsp;then<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;begin<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;break;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;end;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;InputIdx:=i;<br \/>&nbsp;&nbsp;&nbsp;&nbsp;end;<\/p>\n<p>&nbsp;&nbsp;&nbsp;&nbsp;NextToken:=GetNextToken(s0,InputIdx);<br \/>&nbsp;&nbsp;&nbsp;&nbsp;inc(InputIdx,length(NextToken));<br \/>&nbsp;&nbsp;&nbsp;&nbsp;result:=result+ConvertHTMLToken(NextToken,inPre);<br \/>&nbsp;&nbsp;end;<br \/>end;<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\/\/&nbsp;\u628a\u6240\u6709&#038;xxx\u7684\u8f6c\u4e49\uff1b\u6240\u6709&lt;&hellip;<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"_lmt_disableupdate":"","_lmt_disable":"","footnotes":""},"categories":[104],"tags":[],"class_list":["post-651","post","type-post","status-publish","format-standard","hentry","category-technogy"],"_links":{"self":[{"href":"https:\/\/fanhaijun.com\/index.php?rest_route=\/wp\/v2\/posts\/651","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/fanhaijun.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/fanhaijun.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/fanhaijun.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/fanhaijun.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=651"}],"version-history":[{"count":0,"href":"https:\/\/fanhaijun.com\/index.php?rest_route=\/wp\/v2\/posts\/651\/revisions"}],"wp:attachment":[{"href":"https:\/\/fanhaijun.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=651"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/fanhaijun.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=651"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/fanhaijun.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=651"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}