mirror of
https://github.com/Lastorder-DC/rhymix.git
synced 2026-01-08 03:01:43 +09:00
issue 2239 Object, Embed, Iframe source allow by white list
git-svn-id: http://xe-core.googlecode.com/svn/branches/1.5.3.2@11024 201d5d3c-b55e-5fd7-737f-ddc643e51545
This commit is contained in:
parent
8e64ac6e2b
commit
927c944ff3
25 changed files with 1044 additions and 2 deletions
230
classes/security/conf/embedWhiteUrl.xml
Normal file
230
classes/security/conf/embedWhiteUrl.xml
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<whiteurl>
|
||||
<embed>
|
||||
<domain name="http://www.naver.com" isNHN="true" desc="네이버">
|
||||
<pattern>http://serviceapi.nmv.naver.com/</pattern>
|
||||
<pattern>http://scrap.ad.naver.com/</pattern>
|
||||
<pattern>http://event.dn.naver.com/sbsplayer/vmplayer.xap</pattern>
|
||||
</domain>
|
||||
<domain name="" isNHN="true" desc="네이버 뮤직 서비스">
|
||||
<pattern>http://test-player.naver.com/naverPlayer/posting/</pattern>
|
||||
<pattern>http://alpha-player.naver.com/naverPlayer/posting/</pattern>
|
||||
<pattern>http://beta-player.naver.com/naverPlayer/posting/</pattern>
|
||||
<pattern>http://musicplayer.naver.com/naverPlayer/posting/</pattern>
|
||||
<pattern>http://player.music.naver.com/naverPlayer/posting/</pattern>
|
||||
<pattern>http://dev.player.music.naver.com/</pattern>
|
||||
<pattern>http://test.player.music.naver.com/</pattern>
|
||||
<pattern>http://qa.player.music.naver.com/</pattern>
|
||||
<pattern>http://staging.player.music.naver.com/</pattern>
|
||||
<pattern>http://alpha.player.music.naver.com/</pattern>
|
||||
<pattern>http://beta.player.music.naver.com/</pattern>
|
||||
<pattern>http://stage.player.music.naver.com/</pattern>
|
||||
</domain>
|
||||
<domain name="" isNHN="true" desc="네이버 아이두게임">
|
||||
<pattern>http://dev-idogame.hangame.com/idogame/ClientBin/iDoGamePlayer.xap</pattern>
|
||||
<pattern>http://idogame.hangame.com/idogame/ClientBin/iDoGamePlayer.xap</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.daum.net" desc="다음">
|
||||
<pattern>http://flvs.daum.net/flvPlayer.swf</pattern>
|
||||
<pattern>http://api.v.daum.net/</pattern>
|
||||
<pattern>http://tvpot.daum.net/playlist/playlist.swf</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.mncast.com" desc="엠엔캐스트">
|
||||
<pattern>http://dory.mncast.com/mncHMovie.swf</pattern>
|
||||
<pattern>http://dory.mncast.com/mncastPlayer.swf</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.youtube.com" desc="Youtube">
|
||||
<pattern>http://www.youtube.com/v/</pattern>
|
||||
<pattern>http://www.youtube-nocookie.com/</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.mgoon.com" desc="엠군">
|
||||
<pattern>http://play.mgoon.com/Video/</pattern>
|
||||
<pattern>http://doc.mgoon.com/player/</pattern>
|
||||
<pattern>http://play.mgoon.com/Game/</pattern>
|
||||
<pattern>http://play.mgoon.com/Photo/</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.pandora.tv" desc="판도라TV">
|
||||
<pattern>http://flvr.pandora.tv/flv2pan/</pattern>
|
||||
<pattern>http://imgcdn.pandora.tv/gplayer/pandora_EGplayer.swf</pattern>
|
||||
<pattern>http://imgcdn.pandora.tv/gplayer/flJal.swf</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.tagstory.com" desc="태그스토리">
|
||||
<pattern>http://play.tagstory.com/player/</pattern>
|
||||
<pattern>http://www.tagstory.com/player/basic/</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.pullbbang.com" desc="풀빵닷컴">
|
||||
<pattern>http://play.pullbbang.com/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="미래에셋 미디어">
|
||||
<pattern>http://media.miraeasset.com/swf/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="서울시 인터넷방송">
|
||||
<pattern>http://tv.seoul.go.kr/common/player/posting/window_media_player.asp</pattern>
|
||||
<pattern>http://stream.seoul.go.kr</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="게임스팟 동영상">
|
||||
<pattern>mms://vod.gamespot.lgcdn.com/</pattern>
|
||||
<pattern>http://image.com.com/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="광고정보센터">
|
||||
<pattern>mms://media.adic.co.kr/</pattern>
|
||||
<pattern>http://static.adwaple.net/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="비법닷컴">
|
||||
<pattern>http://www.vipup.com/scrap/scrap.asp</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="영상 역사관">
|
||||
<pattern>mms://125.60.2.110/e_history/</pattern>
|
||||
<pattern>mms://218.38.152.33/e_history/</pattern>
|
||||
<pattern>http://125.60.2.110/e_history/</pattern>
|
||||
<pattern>http://218.38.152.33/e_history/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="중소기업청">
|
||||
<pattern>http://www.smba.go.kr/mov/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="트라트라고">
|
||||
<pattern>http://ucc.tlatlago.com/html/uccPlayer/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="싸이월드">
|
||||
<pattern>http://dbi.video.cyworld.com/v.sk/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="이글루스 동영상">
|
||||
<pattern>http://v.egloos.com/v.sk/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="뮤직쉐이크">
|
||||
<pattern>http://www.musicshake.com/musicshakePlayer.swf</pattern>
|
||||
<pattern>http://eng.musicshake.com/musicshakePlayer.swf</pattern>
|
||||
<pattern>http://us.musicshake.com/musicshakePlayer.swf</pattern>
|
||||
<pattern>http://ip.musicshake.com/musicshakePlayer.swf</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="아프리카">
|
||||
<pattern>http://live.afreeca.com:8057/</pattern>
|
||||
<pattern>http://afbbs.afreeca.com:8080/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="플레이NC">
|
||||
<pattern>http://static.plaync.co.kr/plaza/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="XTM">
|
||||
<pattern>http://img.xtmtv.com/images/</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="아이서브">
|
||||
<pattern>http://tv.co.kr/pum/tvcell_basic.swf</pattern>
|
||||
<pattern>http://tv.co.kr/pum/tvcell_mini.swf</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="UC씽">
|
||||
<pattern>http://ucsing.mnet.com/L_swf/ucsing_player.swf</pattern>
|
||||
<pattern>http://flvfile.mnet.com</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.snaps.co.kr" desc="SNAPS">
|
||||
<pattern>http://www.snaps.co.kr/swf/LinkedApp.swf</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.nate.com" desc="네이트">
|
||||
<pattern>http://v.nate.com/v.sk/</pattern>
|
||||
<pattern>http://w.blogdoc.nate.com/</pattern>
|
||||
<pattern>http://blogdoc.nate.com/flash/blogdoc_widget_reco.swf</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.paoin.com" desc="파오인 신문지면 뷰어">
|
||||
<pattern>http://www.paoin.com/Common/swf/ArticleViewer02.swf</pattern>
|
||||
<pattern>http://thumb.paoin.com/paoweb/common/flash/ArticleViewer02.swf</pattern>
|
||||
<pattern>http://thumb.paoin.com/paoweb/common/flash/ArticleShare.swf</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.atzine.com" desc="엣진 서비스">
|
||||
<pattern>http://www.atzine.com/swf/TakeOutWrapper.swf?</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.ohmynews.com" desc="오마이뉴스">
|
||||
<pattern>http://www.ohmynews.com/</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.jjanglive.com" desc="짱라이브 위젯">
|
||||
<pattern>http://www.jjanglive.com/flash/webClient.swf</pattern>
|
||||
<pattern>http://www.jjanglive.com/flash/AdShowClient.swf</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.pmang.com" desc="피망">
|
||||
<pattern>http://file.pmang.com/images/pmang/fifaonline/season2/img/squad/squadmaker_ot.swf</pattern>
|
||||
<pattern>http://fifaonline.pmang.com/squad/t.nwz</pattern>
|
||||
<pattern>http://file.pmang.com/images/pmang/gamepub/player/pm_player.swf</pattern>
|
||||
<pattern>http://www.pmang.com/gamepub/media/player.nwz</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.ccmpia.com" desc="CCMPIA">
|
||||
<pattern>http://www.ccmpia.com/scripts/bgm2.php</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.kbs.co.kr" desc="KBS">
|
||||
<pattern>http://www.kbs.co.kr/zzim/vmplayer/vmplayer.xap</pattern>
|
||||
<pattern>http://vmark.kbs.co.kr/zzim/vmplayer/vmplayer.xap</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.sbs.co.kr" desc="SBS">
|
||||
<pattern>http://netv.sbs.co.kr/sbox/silverlight/ClientBin/NeTVPlayer.xap</pattern>
|
||||
<pattern>http://news.sbs.co.kr/</pattern>
|
||||
<pattern>http://wizard2.sbs.co.kr/</pattern>
|
||||
<pattern>http://sbsplayer.sbs.co.kr/</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.imbc.com" desc="MBC">
|
||||
<pattern>http://onemore.imbc.com/ClientBin/oneplus.xap</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.xtmtv.com" desc="XTM">
|
||||
<pattern>http://www.xtmtv.com/xtmPlayer/javascript/XTM_Scrap_Player.swf</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.chtvn.com" desc="TVN">
|
||||
<pattern>http://player.chtvn.com/tvN_Scrap_Player.swf?</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.detailview.co.kr" desc="디테일뷰">
|
||||
<pattern>http://storage.detailview.co.kr/</pattern>
|
||||
<pattern>http://beta.detailview.co.kr/</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.brightcove.com" desc="Brightcove">
|
||||
<pattern>http://c.brightcove.com/services/viewer</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.hyundai-kiamotors.com" desc="현대기아자동차">
|
||||
<pattern>http://vod.hyundai-kiamotors.com/Flash/PlayerTest/WebPlayer.swf</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.techdays.co.kr" desc="techdays">
|
||||
<pattern>http://www.techdays.co.kr/2010spring/remix10/ClientBin/MediaPlayerTemplate.xap</pattern>
|
||||
<pattern>http://www.microsoft.com</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.wowplan.co.kr" desc="와우플랜">
|
||||
<pattern>http://www.wowplan.co.kr/schedule/bin-debug/scheduleBlogPost.swf</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.allblet.net" desc="allblet.net">
|
||||
<pattern>http://showman.allblet.net/abp.swf</pattern>
|
||||
</domain>
|
||||
<domain name="http://dotsub.com" desc="dotsub.com">
|
||||
<pattern>http://dotsub.com/static/players/portalplayer.swf</pattern>
|
||||
</domain>
|
||||
<domain name="" isNHN="true" desc="socialsearch">
|
||||
<pattern>http://static.campaign.naver.com/0/campaign/2010/10/socialsearch/swf/</pattern>
|
||||
</domain>
|
||||
</embed>
|
||||
<iframe>
|
||||
<domain name="http://www.youtube.com" desc="유튜브 동영상" mobile="true">
|
||||
<pattern>http://www.youtube.com/</pattern>
|
||||
<pattern>https://www.youtube.com/</pattern>
|
||||
<pattern>http://www.youtube-nocookie.com/</pattern>
|
||||
<pattern>https://www.youtube-nocookie.com.com/</pattern>
|
||||
</domain>
|
||||
<domain name="http://maps.google.com" desc="구글맵스" mobile="true">
|
||||
<pattern>http://maps.google.com/</pattern>
|
||||
<pattern>http://maps.google.co.kr/</pattern>
|
||||
</domain>
|
||||
<domain name="http://flvs.daum.net" desc="다음 TV 팟 동영상" mobile="false">
|
||||
<pattern>http://flvs.daum.net/</pattern>
|
||||
</domain>
|
||||
<domain name="http://play.pullbbang.com" desc="풀빵 동영상" mobile="false">
|
||||
<pattern>http://play.pullbbang.com/#.swf</pattern>
|
||||
</domain>
|
||||
<domain name="" desc="게임스팟 동영상">
|
||||
<pattern>http://www.gamespot.com</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.sbs.co.kr" desc="SBS">
|
||||
<pattern>http://sbsplayer.sbs.co.kr/</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.techdays.co.kr" desc="techdays">
|
||||
<pattern>http://www.techdays.co.kr/2010spring/remix10/ClientBin/MediaPlayerTemplate.xap</pattern>
|
||||
<pattern>http://www.microsoft.com</pattern>
|
||||
</domain>
|
||||
<domain name="http://dotsub.com" desc="dotsub.com">
|
||||
<pattern>http://dotsub.com/</pattern>
|
||||
</domain>
|
||||
<domain name="http://www.travelro.co.kr" desc="트래블로">
|
||||
<pattern>http://www.travelro.co.kr/</pattern>
|
||||
</domain>
|
||||
</iframe>
|
||||
</whiteurl>
|
||||
4
classes/security/phphtmlparser/CVS/Entries
Executable file
4
classes/security/phphtmlparser/CVS/Entries
Executable file
|
|
@ -0,0 +1,4 @@
|
|||
/LICENSE/1.3/Tue Apr 13 21:22:43 2004//
|
||||
/README/1.1.1.1/Mon Oct 6 19:17:41 2003//
|
||||
/release.sh/1.1.1.1/Mon Oct 6 19:17:41 2003//
|
||||
D/src////
|
||||
4
classes/security/phphtmlparser/CVS/Entries.Extra
Executable file
4
classes/security/phphtmlparser/CVS/Entries.Extra
Executable file
|
|
@ -0,0 +1,4 @@
|
|||
/LICENSE////
|
||||
/README////
|
||||
/release.sh////
|
||||
D/src////
|
||||
3
classes/security/phphtmlparser/CVS/Entries.Extra.Old
Executable file
3
classes/security/phphtmlparser/CVS/Entries.Extra.Old
Executable file
|
|
@ -0,0 +1,3 @@
|
|||
/LICENSE////
|
||||
/README////
|
||||
/release.sh////
|
||||
4
classes/security/phphtmlparser/CVS/Entries.Old
Executable file
4
classes/security/phphtmlparser/CVS/Entries.Old
Executable file
|
|
@ -0,0 +1,4 @@
|
|||
/LICENSE/1.3/Tue Apr 13 21:22:43 2004//
|
||||
/README/1.1.1.1/Mon Oct 6 19:17:41 2003//
|
||||
/release.sh/1.1.1.1/Mon Oct 6 19:17:41 2003//
|
||||
D
|
||||
1
classes/security/phphtmlparser/CVS/Repository
Executable file
1
classes/security/phphtmlparser/CVS/Repository
Executable file
|
|
@ -0,0 +1 @@
|
|||
phphtmlparser
|
||||
1
classes/security/phphtmlparser/CVS/Root
Executable file
1
classes/security/phphtmlparser/CVS/Root
Executable file
|
|
@ -0,0 +1 @@
|
|||
:ext:jhsolorz@cvs.sourceforge.net:/cvsroot/php-html
|
||||
48
classes/security/phphtmlparser/LICENSE
Executable file
48
classes/security/phphtmlparser/LICENSE
Executable file
|
|
@ -0,0 +1,48 @@
|
|||
/* ====================================================================
|
||||
* Based on The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2003 Jose Solorzano. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution,
|
||||
* if any, must include the following acknowledgment:
|
||||
* "This product includes software developed by
|
||||
* Jose Solorzano."
|
||||
* Alternately, this acknowledgment may appear in the software itself,
|
||||
* if and wherever such third-party acknowledgments normally appear.
|
||||
*
|
||||
* 4. The name "Jose Solorzano" must not be used to endorse or promote
|
||||
* products derived from this software without prior written
|
||||
* permission.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Jose Solorzano",
|
||||
* nor may "Jose Solorzano" appear in their name, without prior written
|
||||
* permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL STARNETSYS, LLC. OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
*/
|
||||
|
||||
70
classes/security/phphtmlparser/README
Executable file
70
classes/security/phphtmlparser/README
Executable file
|
|
@ -0,0 +1,70 @@
|
|||
|
||||
HTML Parser for PHP 4
|
||||
---------------------
|
||||
|
||||
This is a simple PHP based HTML parser.
|
||||
|
||||
How to Use
|
||||
----------
|
||||
|
||||
Simply copy src/htmlparser.inc to a location in your
|
||||
codebase where you'd like to be able to include it.
|
||||
The PHP file which uses the parser might look like
|
||||
this:
|
||||
|
||||
<?
|
||||
include ("htmlparser.inc");
|
||||
|
||||
$htmlText = "... HTML text here ...";
|
||||
HtmlParser parser = new HtmlParser ($htmlText);
|
||||
while ($parser->parse()) {
|
||||
|
||||
// Data you can use here:
|
||||
//
|
||||
// $parser->iNodeType
|
||||
// $parser->iNodeName
|
||||
// $parser->iNodeValue
|
||||
// $parser->iNodeAttributes
|
||||
|
||||
}
|
||||
|
||||
You will find additional documentation for each
|
||||
field in the source code.
|
||||
|
||||
?>
|
||||
|
||||
|
||||
Files of Interest
|
||||
-----------------
|
||||
|
||||
src/htmlparser.inc -- Has HtmlParser class
|
||||
src/html2text.inc -- Has Html2Text class
|
||||
src/ex_dumptags.php -- Example: Dumps HTML nodes from test HTML string
|
||||
src/ex_html2text.php -- Example: Dumps text for test HTML string
|
||||
|
||||
Running examples offline
|
||||
------------------------
|
||||
|
||||
On a Unix system, you can run the examples as follows
|
||||
if you have PHP installed:
|
||||
|
||||
cd src
|
||||
php < ex_dumptags.php
|
||||
php < ex_html2text.php
|
||||
|
||||
|
||||
License
|
||||
-------
|
||||
|
||||
This is an open source project. The license is
|
||||
based on the Apache Software License. See the file
|
||||
named LICENSE.
|
||||
|
||||
Author
|
||||
------
|
||||
|
||||
The parser and the HTML-to-text example were written
|
||||
by Jose Solorzano of Starnetsys, LLC. If you need a
|
||||
program or website developed, professionally, within
|
||||
budget, and on time, contact us (http://starnetsys.com)
|
||||
|
||||
10
classes/security/phphtmlparser/release.sh
Executable file
10
classes/security/phphtmlparser/release.sh
Executable file
|
|
@ -0,0 +1,10 @@
|
|||
#!/bin/sh
|
||||
|
||||
if [ "$TEMP" = "" ]; then
|
||||
TEMP="/tmp";
|
||||
fi
|
||||
|
||||
rm -f `find . -name '*~'`
|
||||
(cd ..; tar cvf $TEMP/phphtmlparser$1.tar phphtmlparser)
|
||||
|
||||
gzip $TEMP/phphtmlparser$1.tar
|
||||
7
classes/security/phphtmlparser/src/CVS/Entries
Executable file
7
classes/security/phphtmlparser/src/CVS/Entries
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
/ex_dumptags.php/1.2/Wed Oct 29 16:42:53 2003//
|
||||
/ex_dumpurl.php/1.3/Fri Apr 16 15:52:39 2004//
|
||||
/ex_html2text.php/1.2/Fri Apr 16 15:52:39 2004//
|
||||
/html2text.inc/1.3/Wed Oct 29 16:42:53 2003//
|
||||
/htmlparser.inc/1.5/Fri Apr 16 15:52:39 2004//
|
||||
/testfile.html/1.2/Fri Apr 16 15:52:39 2004//
|
||||
D
|
||||
6
classes/security/phphtmlparser/src/CVS/Entries.Extra
Executable file
6
classes/security/phphtmlparser/src/CVS/Entries.Extra
Executable file
|
|
@ -0,0 +1,6 @@
|
|||
/ex_dumptags.php////
|
||||
/ex_dumpurl.php////
|
||||
/ex_html2text.php////
|
||||
/html2text.inc////
|
||||
/htmlparser.inc////
|
||||
/testfile.html////
|
||||
0
classes/security/phphtmlparser/src/CVS/Entries.Extra.Old
Executable file
0
classes/security/phphtmlparser/src/CVS/Entries.Extra.Old
Executable file
0
classes/security/phphtmlparser/src/CVS/Entries.Old
Executable file
0
classes/security/phphtmlparser/src/CVS/Entries.Old
Executable file
1
classes/security/phphtmlparser/src/CVS/Repository
Executable file
1
classes/security/phphtmlparser/src/CVS/Repository
Executable file
|
|
@ -0,0 +1 @@
|
|||
phphtmlparser/src
|
||||
1
classes/security/phphtmlparser/src/CVS/Root
Executable file
1
classes/security/phphtmlparser/src/CVS/Root
Executable file
|
|
@ -0,0 +1 @@
|
|||
:ext:jhsolorz@cvs.sourceforge.net:/cvsroot/php-html
|
||||
11
classes/security/phphtmlparser/src/ex_dumptags.php
Executable file
11
classes/security/phphtmlparser/src/ex_dumptags.php
Executable file
|
|
@ -0,0 +1,11 @@
|
|||
<?
|
||||
include ("htmlparser.inc");
|
||||
$htmlText = "<html><!-- comment --><body>This is the body</body></html>";
|
||||
$parser = new HtmlParser($htmlText);
|
||||
while ($parser->parse()) {
|
||||
echo "-----------------------------------\r\n";
|
||||
echo "Node type: " . $parser->iNodeType . "\r\n";
|
||||
echo "Node name: " . $parser->iNodeName . "\r\n";
|
||||
echo "Node value: " . $parser->iNodeValue . "\r\n";
|
||||
}
|
||||
?>
|
||||
29
classes/security/phphtmlparser/src/ex_dumpurl.php
Executable file
29
classes/security/phphtmlparser/src/ex_dumpurl.php
Executable file
|
|
@ -0,0 +1,29 @@
|
|||
<?
|
||||
// Example:
|
||||
// Dumps nodes from testfile.html.
|
||||
// To run: php < ex_dumpurl.php
|
||||
|
||||
include ("htmlparser.inc");
|
||||
$parser = HtmlParser_ForFile ("testfile.html");
|
||||
//$parser = HtmlParser_ForURL ("http://yahoo.com");
|
||||
while ($parser->parse()) {
|
||||
echo "-----------------------------------\r\n";
|
||||
echo "Name=" . $parser->iNodeName . ";";
|
||||
echo "Type=" . $parser->iNodeType . ";";
|
||||
if ($parser->iNodeType == NODE_TYPE_TEXT || $parser->iNodeType == NODE_TYPE_COMMENT) {
|
||||
echo "Value='" . $parser->iNodeValue . "'";
|
||||
}
|
||||
echo "\r\n";
|
||||
if ($parser->iNodeType == NODE_TYPE_ELEMENT) {
|
||||
echo "ATTRIBUTES: ";
|
||||
$attrValues = $parser->iNodeAttributes;
|
||||
$attrNames = array_keys($attrValues);
|
||||
$size = count($attrNames);
|
||||
for ($i = 0; $i < $size; $i++) {
|
||||
$name = $attrNames[$i];
|
||||
echo $attrNames[$i] . "=\"" . $attrValues[$name] . "\" ";
|
||||
}
|
||||
}
|
||||
echo "\r\n";
|
||||
}
|
||||
?>
|
||||
18
classes/security/phphtmlparser/src/ex_html2text.php
Executable file
18
classes/security/phphtmlparser/src/ex_html2text.php
Executable file
|
|
@ -0,0 +1,18 @@
|
|||
<?
|
||||
// Example: html2text
|
||||
// Converts HTML to formatted ASCII text.
|
||||
// Run with: php < ex_html2text.php
|
||||
|
||||
include ("html2text.inc");
|
||||
|
||||
$htmlText = "Html2text is a tool that allows you to<br>" .
|
||||
"convert HTML to text.<p>" .
|
||||
"Does it work?";
|
||||
|
||||
$htmlToText = new Html2Text ($htmlText, 15);
|
||||
$text = $htmlToText->convert();
|
||||
echo "Conversion follows:\r\n";
|
||||
echo "-------------------\r\n";
|
||||
echo $text;
|
||||
|
||||
?>
|
||||
214
classes/security/phphtmlparser/src/html2text.inc
Executable file
214
classes/security/phphtmlparser/src/html2text.inc
Executable file
|
|
@ -0,0 +1,214 @@
|
|||
<?
|
||||
|
||||
/*
|
||||
* Copyright (c) 2003 Jose Solorzano. All rights reserved.
|
||||
* Redistribution of source must retain this copyright notice.
|
||||
*/
|
||||
|
||||
include ("htmlparser.inc");
|
||||
|
||||
/**
|
||||
* Class Html2Text. (HtmlParser example.)
|
||||
* Converts HTML to ASCII attempting to preserve
|
||||
* document structure.
|
||||
* To use, create an instance of Html2Text passing
|
||||
* the text to convert and the desired maximum
|
||||
* number of characters per line. Then invoke
|
||||
* convert() which returns ASCII text.
|
||||
*/
|
||||
class Html2Text {
|
||||
|
||||
// Private fields
|
||||
|
||||
var $iCurrentLine = "";
|
||||
var $iCurrentWord = "";
|
||||
var $iCurrentWordArray;
|
||||
var $iCurrentWordIndex;
|
||||
var $iInScript;
|
||||
var $iListLevel = 0;
|
||||
var $iHtmlText;
|
||||
var $iMaxColumns;
|
||||
var $iHtmlParser;
|
||||
|
||||
// Constants
|
||||
|
||||
var $TOKEN_BR = 0;
|
||||
var $TOKEN_P = 1;
|
||||
var $TOKEN_LI = 2;
|
||||
var $TOKEN_AFTERLI = 3;
|
||||
var $TOKEN_UL = 4;
|
||||
var $TOKEN_ENDUL = 5;
|
||||
|
||||
function Html2Text ($aHtmlText, $aMaxColumns) {
|
||||
$this->iHtmlText = $aHtmlText;
|
||||
$this->iMaxColumns = $aMaxColumns;
|
||||
}
|
||||
|
||||
function convert() {
|
||||
$this->iHtmlParser = new HtmlParser($this->iHtmlText);
|
||||
$wholeText = "";
|
||||
while (($line = $this->getLine()) !== false) {
|
||||
$wholeText .= ($line . "\r\n");
|
||||
}
|
||||
return $wholeText;
|
||||
}
|
||||
|
||||
function getLine() {
|
||||
while (true) {
|
||||
if (!$this->addWordToLine($this->iCurrentWord)) {
|
||||
$retvalue = $this->iCurrentLine;
|
||||
$this->iCurrentLine = "";
|
||||
return $retvalue;
|
||||
}
|
||||
$word = $this->getWord();
|
||||
if ($word === false) {
|
||||
if ($this->iCurrentLine == "") {
|
||||
break;
|
||||
}
|
||||
$retvalue = $this->iCurrentLine;
|
||||
$this->iCurrentLine = "";
|
||||
$this->iInText = false;
|
||||
$this->iCurrentWord = "";
|
||||
return $retvalue;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function addWordToLine ($word) {
|
||||
if ($this->iInScript) {
|
||||
return true;
|
||||
}
|
||||
$prevLine = $this->iCurrentLine;
|
||||
if ($word === $this->TOKEN_BR) {
|
||||
$this->iCurrentWord = "";
|
||||
return false;
|
||||
}
|
||||
if ($word === $this->TOKEN_P) {
|
||||
$this->iCurrentWord = $this->TOKEN_BR;
|
||||
return false;
|
||||
}
|
||||
if ($word === $this->TOKEN_UL) {
|
||||
$this->iCurrentWord = $this->TOKEN_BR;
|
||||
return false;
|
||||
}
|
||||
if ($word === $this->TOKEN_ENDUL) {
|
||||
$this->iCurrentWord = $this->TOKEN_BR;
|
||||
return false;
|
||||
}
|
||||
if ($word === $this->TOKEN_LI) {
|
||||
$this->iCurrentWord = $this->TOKEN_AFTERLI;
|
||||
return false;
|
||||
}
|
||||
$toAdd = $word;
|
||||
if ($word === $this->TOKEN_AFTERLI) {
|
||||
$toAdd = "";
|
||||
}
|
||||
if ($prevLine != "") {
|
||||
$prevLine .= " ";
|
||||
}
|
||||
else {
|
||||
$prevLine = $this->getIndentation($word === $this->TOKEN_AFTERLI);
|
||||
}
|
||||
$candidateLine = $prevLine . $toAdd;
|
||||
if (strlen ($candidateLine) > $this->iMaxColumns && $prevLine != "") {
|
||||
return false;
|
||||
}
|
||||
$this->iCurrentLine = $candidateLine;
|
||||
return true;
|
||||
}
|
||||
|
||||
function getWord() {
|
||||
while (true) {
|
||||
if ($this->iHtmlParser->iNodeType == NODE_TYPE_TEXT) {
|
||||
if (!$this->iInText) {
|
||||
$words = $this->splitWords($this->iHtmlParser->iNodeValue);
|
||||
$this->iCurrentWordArray = $words;
|
||||
$this->iCurrentWordIndex = 0;
|
||||
$this->iInText = true;
|
||||
}
|
||||
if ($this->iCurrentWordIndex < count($this->iCurrentWordArray)) {
|
||||
$this->iCurrentWord = $this->iCurrentWordArray[$this->iCurrentWordIndex++];
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else {
|
||||
$this->iInText = false;
|
||||
}
|
||||
}
|
||||
else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ELEMENT) {
|
||||
if (strcasecmp ($this->iHtmlParser->iNodeName, "br") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = $this->TOKEN_BR;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else if (strcasecmp ($this->iHtmlParser->iNodeName, "p") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = $this->TOKEN_P;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = "";
|
||||
$this->iInScript = true;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = $this->TOKEN_UL;
|
||||
$this->iListLevel++;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else if (strcasecmp ($this->iHtmlParser->iNodeName, "li") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = $this->TOKEN_LI;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
}
|
||||
else if ($this->iHtmlParser->iNodeType == NODE_TYPE_ENDELEMENT) {
|
||||
if (strcasecmp ($this->iHtmlParser->iNodeName, "script") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = "";
|
||||
$this->iInScript = false;
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
else if (strcasecmp ($this->iHtmlParser->iNodeName, "ul") == 0 || strcasecmp ($this->iHtmlParser->iNodeName, "ol") == 0) {
|
||||
$this->iHtmlParser->parse();
|
||||
$this->iCurrentWord = $this->TOKEN_ENDUL;
|
||||
if ($this->iListLevel > 0) {
|
||||
$this->iListLevel--;
|
||||
}
|
||||
return $this->iCurrentWord;
|
||||
}
|
||||
}
|
||||
if (!$this->iHtmlParser->parse()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function splitWords ($text) {
|
||||
$words = split ("[ \t\r\n]+", $text);
|
||||
for ($idx = 0; $idx < count($words); $idx++) {
|
||||
$words[$idx] = $this->htmlDecode($words[$idx]);
|
||||
}
|
||||
return $words;
|
||||
}
|
||||
|
||||
function htmlDecode ($text) {
|
||||
// TBD
|
||||
return $text;
|
||||
}
|
||||
|
||||
function getIndentation ($hasLI) {
|
||||
$indent = "";
|
||||
$idx = 0;
|
||||
for ($idx = 0; $idx < ($this->iListLevel - 1); $idx++) {
|
||||
$indent .= " ";
|
||||
}
|
||||
if ($this->iListLevel > 0) {
|
||||
$indent = $hasLI ? ($indent . "- ") : ($indent . " ");
|
||||
}
|
||||
return $indent;
|
||||
}
|
||||
}
|
||||
365
classes/security/phphtmlparser/src/htmlparser.inc
Executable file
365
classes/security/phphtmlparser/src/htmlparser.inc
Executable file
|
|
@ -0,0 +1,365 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* Copyright (c) 2003 Jose Solorzano. All rights reserved.
|
||||
* Redistribution of source must retain this copyright notice.
|
||||
*
|
||||
* Jose Solorzano (http://jexpert.us) is a software consultant.
|
||||
*
|
||||
* Contributions by:
|
||||
* - Leo West (performance improvements)
|
||||
*/
|
||||
|
||||
define ("NODE_TYPE_START",0);
|
||||
define ("NODE_TYPE_ELEMENT",1);
|
||||
define ("NODE_TYPE_ENDELEMENT",2);
|
||||
define ("NODE_TYPE_TEXT",3);
|
||||
define ("NODE_TYPE_COMMENT",4);
|
||||
define ("NODE_TYPE_DONE",5);
|
||||
|
||||
/**
|
||||
* Class HtmlParser.
|
||||
* To use, create an instance of the class passing
|
||||
* HTML text. Then invoke parse() until it's false.
|
||||
* When parse() returns true, $iNodeType, $iNodeName
|
||||
* $iNodeValue and $iNodeAttributes are updated.
|
||||
*
|
||||
* To create an HtmlParser instance you may also
|
||||
* use convenience functions HtmlParser_ForFile
|
||||
* and HtmlParser_ForURL.
|
||||
*/
|
||||
class HtmlParser {
|
||||
|
||||
/**
|
||||
* Field iNodeType.
|
||||
* May be one of the NODE_TYPE_* constants above.
|
||||
*/
|
||||
var $iNodeType;
|
||||
|
||||
/**
|
||||
* Field iNodeName.
|
||||
* For elements, it's the name of the element.
|
||||
*/
|
||||
var $iNodeName = "";
|
||||
|
||||
/**
|
||||
* Field iNodeValue.
|
||||
* For text nodes, it's the text.
|
||||
*/
|
||||
var $iNodeValue = "";
|
||||
|
||||
/**
|
||||
* Field iNodeAttributes.
|
||||
* A string-indexed array containing attribute values
|
||||
* of the current node. Indexes are always lowercase.
|
||||
*/
|
||||
var $iNodeAttributes;
|
||||
|
||||
// The following fields should be
|
||||
// considered private:
|
||||
|
||||
var $iHtmlText;
|
||||
var $iHtmlTextLength;
|
||||
var $iHtmlTextIndex = 0;
|
||||
var $iHtmlCurrentChar;
|
||||
var $BOE_ARRAY;
|
||||
var $B_ARRAY;
|
||||
var $BOS_ARRAY;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* Constructs an HtmlParser instance with
|
||||
* the HTML text given.
|
||||
*/
|
||||
function HtmlParser ($aHtmlText) {
|
||||
$this->iHtmlText = $aHtmlText;
|
||||
$this->iHtmlTextLength = strlen($aHtmlText);
|
||||
$this->iNodeAttributes = array();
|
||||
$this->setTextIndex (0);
|
||||
|
||||
$this->BOE_ARRAY = array (" ", "\t", "\r", "\n", "=" );
|
||||
$this->B_ARRAY = array (" ", "\t", "\r", "\n" );
|
||||
$this->BOS_ARRAY = array (" ", "\t", "\r", "\n", "/" );
|
||||
}
|
||||
|
||||
/**
|
||||
* Method parse.
|
||||
* Parses the next node. Returns false only if
|
||||
* the end of the HTML text has been reached.
|
||||
* Updates values of iNode* fields.
|
||||
*/
|
||||
function parse() {
|
||||
$text = $this->skipToElement();
|
||||
if ($text != "") {
|
||||
$this->iNodeType = NODE_TYPE_TEXT;
|
||||
$this->iNodeName = "Text";
|
||||
$this->iNodeValue = $text;
|
||||
return true;
|
||||
}
|
||||
return $this->readTag();
|
||||
}
|
||||
|
||||
function clearAttributes() {
|
||||
$this->iNodeAttributes = array();
|
||||
}
|
||||
|
||||
function readTag() {
|
||||
if ($this->iCurrentChar != "<") {
|
||||
$this->iNodeType = NODE_TYPE_DONE;
|
||||
return false;
|
||||
}
|
||||
$this->clearAttributes();
|
||||
$this->skipMaxInTag ("<", 1);
|
||||
if ($this->iCurrentChar == '/') {
|
||||
$this->moveNext();
|
||||
$name = $this->skipToBlanksInTag();
|
||||
$this->iNodeType = NODE_TYPE_ENDELEMENT;
|
||||
$this->iNodeName = $name;
|
||||
$this->iNodeValue = "";
|
||||
$this->skipEndOfTag();
|
||||
return true;
|
||||
}
|
||||
$name = $this->skipToBlanksOrSlashInTag();
|
||||
if (!$this->isValidTagIdentifier ($name)) {
|
||||
$comment = false;
|
||||
if (strpos($name, "!--") === 0) {
|
||||
$ppos = strpos($name, "--", 3);
|
||||
if (strpos($name, "--", 3) === (strlen($name) - 2)) {
|
||||
$this->iNodeType = NODE_TYPE_COMMENT;
|
||||
$this->iNodeName = "Comment";
|
||||
$this->iNodeValue = "<" . $name . ">";
|
||||
$comment = true;
|
||||
}
|
||||
else {
|
||||
$rest = $this->skipToStringInTag ("-->");
|
||||
if ($rest != "") {
|
||||
$this->iNodeType = NODE_TYPE_COMMENT;
|
||||
$this->iNodeName = "Comment";
|
||||
$this->iNodeValue = "<" . $name . $rest;
|
||||
$comment = true;
|
||||
// Already skipped end of tag
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!$comment) {
|
||||
$this->iNodeType = NODE_TYPE_TEXT;
|
||||
$this->iNodeName = "Text";
|
||||
$this->iNodeValue = "<" . $name;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$this->iNodeType = NODE_TYPE_ELEMENT;
|
||||
$this->iNodeValue = "";
|
||||
$this->iNodeName = $name;
|
||||
while ($this->skipBlanksInTag()) {
|
||||
$attrName = $this->skipToBlanksOrEqualsInTag();
|
||||
if ($attrName != "" && $attrName != "/") {
|
||||
$this->skipBlanksInTag();
|
||||
if ($this->iCurrentChar == "=") {
|
||||
$this->skipEqualsInTag();
|
||||
$this->skipBlanksInTag();
|
||||
$value = $this->readValueInTag();
|
||||
$this->iNodeAttributes[strtolower($attrName)] = $value;
|
||||
}
|
||||
else {
|
||||
$this->iNodeAttributes[strtolower($attrName)] = "";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$this->skipEndOfTag();
|
||||
return true;
|
||||
}
|
||||
|
||||
function isValidTagIdentifier ($name) {
|
||||
return ereg ("^[A-Za-z0-9_\\-]+$", $name);
|
||||
}
|
||||
|
||||
function skipBlanksInTag() {
|
||||
return "" != ($this->skipInTag ($this->B_ARRAY));
|
||||
}
|
||||
|
||||
function skipToBlanksOrEqualsInTag() {
|
||||
return $this->skipToInTag ($this->BOE_ARRAY);
|
||||
}
|
||||
|
||||
function skipToBlanksInTag() {
|
||||
return $this->skipToInTag ($this->B_ARRAY);
|
||||
}
|
||||
|
||||
function skipToBlanksOrSlashInTag() {
|
||||
return $this->skipToInTag ($this->BOS_ARRAY);
|
||||
}
|
||||
|
||||
function skipEqualsInTag() {
|
||||
return $this->skipMaxInTag ("=", 1);
|
||||
}
|
||||
|
||||
function readValueInTag() {
|
||||
$ch = $this->iCurrentChar;
|
||||
$value = "";
|
||||
if ($ch == "\"") {
|
||||
$this->skipMaxInTag ("\"", 1);
|
||||
$value = $this->skipToInTag ("\"");
|
||||
$this->skipMaxInTag ("\"", 1);
|
||||
}
|
||||
else if ($ch == "'") {
|
||||
$this->skipMaxInTag ("'", 1);
|
||||
$value = $this->skipToInTag ("'");
|
||||
$this->skipMaxInTag ("'", 1);
|
||||
}
|
||||
else {
|
||||
$value = $this->skipToBlanksInTag();
|
||||
}
|
||||
return $value;
|
||||
}
|
||||
|
||||
function setTextIndex ($index) {
|
||||
$this->iHtmlTextIndex = $index;
|
||||
if ($index >= $this->iHtmlTextLength) {
|
||||
$this->iCurrentChar = -1;
|
||||
}
|
||||
else {
|
||||
$this->iCurrentChar = $this->iHtmlText{$index};
|
||||
}
|
||||
}
|
||||
|
||||
function moveNext() {
|
||||
if ($this->iHtmlTextIndex < $this->iHtmlTextLength) {
|
||||
$this->setTextIndex ($this->iHtmlTextIndex + 1);
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function skipEndOfTag() {
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
if ($ch == ">") {
|
||||
$this->moveNext();
|
||||
return;
|
||||
}
|
||||
$this->moveNext();
|
||||
}
|
||||
}
|
||||
|
||||
function skipInTag ($chars) {
|
||||
$sb = "";
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
if ($ch == ">") {
|
||||
return $sb;
|
||||
} else {
|
||||
$match = false;
|
||||
for ($idx = 0; $idx < count($chars); $idx++) {
|
||||
if ($ch == $chars[$idx]) {
|
||||
$match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$match) {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
function skipMaxInTag ($chars, $maxChars) {
|
||||
$sb = "";
|
||||
$count = 0;
|
||||
while (($ch = $this->iCurrentChar) !== -1 && $count++ < $maxChars) {
|
||||
if ($ch == ">") {
|
||||
return $sb;
|
||||
} else {
|
||||
$match = false;
|
||||
for ($idx = 0; $idx < count($chars); $idx++) {
|
||||
if ($ch == $chars[$idx]) {
|
||||
$match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!$match) {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
function skipToInTag ($chars) {
|
||||
$sb = "";
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
$match = $ch == ">";
|
||||
if (!$match) {
|
||||
for ($idx = 0; $idx < count($chars); $idx++) {
|
||||
if ($ch == $chars[$idx]) {
|
||||
$match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($match) {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
function skipToElement() {
|
||||
$sb = "";
|
||||
while (($ch = $this->iCurrentChar) !== -1) {
|
||||
if ($ch == "<") {
|
||||
return $sb;
|
||||
}
|
||||
$sb .= $ch;
|
||||
$this->moveNext();
|
||||
}
|
||||
return $sb;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns text between current position and $needle,
|
||||
* inclusive, or "" if not found. The current index is moved to a point
|
||||
* after the location of $needle, or not moved at all
|
||||
* if nothing is found.
|
||||
*/
|
||||
function skipToStringInTag ($needle) {
|
||||
$pos = strpos ($this->iHtmlText, $needle, $this->iHtmlTextIndex);
|
||||
if ($pos === false) {
|
||||
return "";
|
||||
}
|
||||
$top = $pos + strlen($needle);
|
||||
$retvalue = substr ($this->iHtmlText, $this->iHtmlTextIndex, $top - $this->iHtmlTextIndex);
|
||||
$this->setTextIndex ($top);
|
||||
return $retvalue;
|
||||
}
|
||||
}
|
||||
|
||||
function HtmlParser_ForFile ($fileName) {
|
||||
return HtmlParser_ForURL($fileName);
|
||||
}
|
||||
|
||||
function HtmlParser_ForURL ($url) {
|
||||
$fp = fopen ($url, "r");
|
||||
$content = "";
|
||||
while (true) {
|
||||
$data = fread ($fp, 8192);
|
||||
if (strlen($data) == 0) {
|
||||
break;
|
||||
}
|
||||
$content .= $data;
|
||||
}
|
||||
fclose ($fp);
|
||||
return new HtmlParser ($content);
|
||||
}
|
||||
|
||||
php?>
|
||||
8
classes/security/phphtmlparser/src/testfile.html
Executable file
8
classes/security/phphtmlparser/src/testfile.html
Executable file
|
|
@ -0,0 +1,8 @@
|
|||
<!-- first comment --> <!-- second comment -->
|
||||
<elem attribute1="foobar" attribute2=""/>Text After Elem
|
||||
<!--comment1-->
|
||||
<elem2>Text</elem2>
|
||||
<!-- comment2-->
|
||||
<elem3 attribute3='insinglequotes'/>
|
||||
<!--comment3 -->Text between comments<!-- comment4 -->
|
||||
<elem4/>
|
||||
Loading…
Add table
Add a link
Reference in a new issue