PDA

View Full Version : parsing html ad sendindg href to flash


zzcamarinha
03-27-2004, 11:10 AM
My project is to make the user insert an url and them one mc will be created theem my program would see all the href (links) in the target url and send them to flash were for each one of them a new MC would be created and so working as a web browser

I have this code in an html parser and a an example file, and what I get is the complete html off the target url, but I need to change it in order to get only the hrefs (links) into a list of variables (xml) so that flash can get them, can anybody help me.

here is the code:

#

class html_parser{

var $content;
var $linecount;

function html_parser($file, $linecount=0){
$this->content .= '<style>font, ol, li { font-family: "Courier New", Courier; font-size: 12px; } </style>';
$this->linecount = $linecount;
if($this->linecount) $this->content .= '<ol type="1">';

$id = @fopen($file,"r");

while($data = fread($id, 4096)) $this->html_parse($data);

fclose($id);

if($this->linecount) $this->content .= '</ol>';

}

function html_parse($input){

$end = 1;
while($end>0){
$start = strpos($input,"<");

if((strpos($input,"!--")-1)==$start){
$end = strpos($input,"-->");
if($end>0){
$comment = substr($input,$start+4,$end-$start-4);
$this->content .= ($this->linecount ? '<li>'.$this->html_comment($comment).'</li>' : $this->html_comment($comment) );
$input = substr($input,$end+3);
}
}elseif((strpos($input,"!")-1)==$start){
$end = strpos($input,">");
if($end>0){
$doctype = substr($input,$start+2,$end-$start-2);
$this->content .= ($this->linecount ? '<li>'.$this->html_doctype($doctype).'</li>' : $this->html_doctype($doctype) );
$input = substr($input,$end+1);
}
}else{
$end = strpos($input,">");
if($end>0){
$tag = trim(substr($input,$start+1,$end-$start-1));
$this->content .= $this->html_text(substr($input,0,$start));
$this->content .= ($this->linecount ? '<li>'.$this->htmlparse_parsetag($tag).'</li>' : $this->htmlparse_parsetag($tag) );
$input = substr($input,$end+1);
}
}
}
$this->content .= $input;

}

function htmlparse_parsetag($tag){
if(((strrpos($tag,"/")+1)==strlen($tag)) and (strlen($tag)>1)) $tag = substr($tag,0,strlen($tag)-1);

if (strpos($tag," ")>0){
$pos = strpos($tag," ");
$element = trim(substr($tag,0,$pos));
$attributes = $this->htmlparse_parseattributes(trim(substr($tag,$pos))) ;
$tagasi .= $this->html_start($element,$attributes);
}else{
$element = trim($tag);
if ((strpos($element,"/")===false)){
$tagasi .= $this->html_start($element);
}else{
$tagasi .= $this->html_end(substr($element,1));
}
}
return $tagasi;
}

function htmlparse_parseattributes($attributes){
unset($output);
$attribute = "";

while(strpos($attributes,"=")>0){
$pos = strpos($attributes,"=");
$attribute = trim(substr($attributes,0,$pos));
$attributes = trim(substr($attributes,$pos+1));
$pos2 = strpos($attributes,"\"");
$pos3 = strpos($attributes,"'");
if(!($pos3===false) and !($pos2===false) and ($pos3<$pos2)) $pos2 = $pos3;
if(!($pos3===false) and ($pos2===false) and (($pos3<$pos) or ($pos==0))) $pos2 = $pos3;

if(!($pos2===false) and (($pos2<$pos) or ($pos==0))){
if (substr($attributes,0,1) == "\""){
$pos = strpos($attributes,"\"",1);
$val = substr($attributes,1,$pos-1);
}elseif (substr($attributes,0,1) == "'"){
$pos = strpos($attributes,"'",1);
$val = substr($attributes,1,$pos-1);
}else{
$pos1 = strpos($attributes,"=",1);
$val = substr($attributes,0,$pos1);
$pos1a = strrpos($val," ");
$pos = $pos1-(strlen($val)-$pos1a);
$val = substr($val,0,$pos1a);
}

while (strpos($attribute," ")>0){
$pos1 = strpos($attribute," ");
$attr1 = substr($attribute,0,$pos1);
$output[$attr1] = null;
$attribute = trim(substr($attribute,$pos1+1));
}

$output[$attribute] = $val;
$attributes = trim(substr($attributes,$pos+1));

}elseif ($pos>0){
if (strpos($attributes,"=")>0){
$pos = strpos($attributes,"=");
$val = substr($attributes,0,$pos);
}else{
$val = $attributes;
}

$pos2 = strrpos($val," ");
if($pos2>0){
$len = strlen($val);
$val = substr($val,0,$pos2);
$attributes = trim(substr($attributes,($pos-$len)+$pos2));
}else{
$len = strlen($val);
$attributes = trim(substr($attributes,$len));
}

while (strpos($attribute," ")>0){
$pos1 = strpos($attribute," ");
$attr1 = substr($attribute,0,$pos1);
$output[$attr1] = null;
$attribute = trim(substr($attribute,$pos1+1));
}

$output[$attribute] = $val;

}else{
while (strpos($attribute," ")>0){
$pos1 = strpos($attribute," ");
$attr1 = substr($attribute,0,$pos1);
$output[$attr1] = null;
$attribute = trim(substr($attribute,$pos1+1));
}
$output[$attribute] = $attributes;
}
}

if(strlen(trim($attributes))>0){
while (strpos($attribute," ")>0){
$pos1 = strpos($attribute," ");
$attr1 = substr($attribute,0,$pos1);
$output[$attr1] = null;
$attribute = trim(substr($attribute,$pos1+1));
}

$output[$attributes] = null;

}

if (isset($output)) return($output);
}


function html_start($element,$attributes=FALSE, $t=0){

$tagasi .= '<font color="#0000ff">'.htmlentities('<'.$element);

if(is_array($attributes)){
while(list($k, $v) = each($attributes)){
$k = strtolower($k);
$tagasi .= '<font color="#ff0000"> '.strtolower($k).'</font>';
if($v!=null){
$tagasi .= '=';
$tagasi .= '<font color="#FF00FF">"'.htmlentities($v).'"</font>';
}
}
}
$tagasi .= htmlentities('>').'</font>';
return $tagasi;
}


function html_end($element, $t=0)
{
return '<font color="#0000ff">'.htmlentities('</').htmlentities($element).htmlentities('>').'</font>';
}


function html_text($data, $t=0)
{
return '<font color="#000000">'.nl2br(htmlentities($data)).'</font>';
}


function html_comment($data, $t=0)
{
return '<font color="#008000">'.nl2br(htmlentities('<!--'.$data.'-->')).'</font>';
}


function html_doctype($data, $t=0)
{
return '<font color="#999999">'.htmlentities('<!'.$data.'>').'</font>';
}


}
?>


and here is the output file:

<?php
#
# indrek päri
# [email protected]
#

# parser class
include_once('html_parser.php');

# html file to parse
$file = "http://www.sopsa.pt";

# new class ( filename & show linenumbers = 1 or don't show = 0 )
$parser = new html_parser($file,1);

# saida variavel
echo $parser->content;
# echo '&xmlhref='.$variavel

?>

I also need to make the file to parse to be decide by the user in flash's firts interface so in flash I need to changee the variable $file, and send it to php.
Thank you
Ricardo