The Legendary Next Update (365)

243 Name:   2005-10-22 10:54 ID:Heaven [Del]

If you want to have a look at what the code actually does to dig out flaws, here is the current version:

sub sanitize_html($%)
{
my ($html,%tags)=@_;
my (@stack,$clean);
my $entity_re=qr/&(?!\#[0-9]+;|\#x[0-9a-fA-F]+;|amp;)/;

while($html=~/(?:([^<]+)|<([^<>]*)>?)/g)
{
my ($text,$tag)=($1,$2);

if($text)
{
$text=~s/$entity_re/&amp;/g;
$text=~s/>/&gt;/g;
$clean.=$text;
}
else
{
if($tag=~m!^\s*(/?)\s*([a-z0-9_:\-\.]+)(?:\s+(.*?)|)\s*(/?)\s*$!si)
{
my ($closing,$name,$args,$implicit)=($1,lc($2),$3,$4);

if($tags{$name})
{
if($closing)
{
if(grep { $_ eq $name } @stack)
{
my $entry;

do {
$entry=pop @stack;
$clean.="</$entry>";
} until $entry eq $name;
}
}
else
{
my %args;

$args=~s/\s/ /sg;

while($args=~/([a-z0-9_:\-\.]+)(?:\s*=\s*(?:'([^']*?)'|"([^"]*?)"|['"]?([^'" ]*))|)/gi)
{
my ($arg,$value)=(lc($1),defined($2)?$2:defined($3)?$3:$4);
$value=$arg unless defined($value);

my $type=$tags{$name}{args}{$arg};

if($type)
{
my $passes=1;

if($type=~/url/i) { $passes=0 unless $value=~/(?:^$protocol_re:|^[^:]+$)/ }
if($type=~/number/i) { $passes=0 unless $value=~/^[0-9]+$/ }

if($passes)
{
$value=~s/$entity_re/&amp;/g;

if($value=~/"/) { $value="'$value'" }
else { $value="\"$value\"" }

$args{$arg}=$value;
}
}
}

my $cleanargs=join " ",map { "$_=$args{$_}" } keys %args;

$implicit="/" if($tags{$name}{empty});

push @stack,$name unless $implicit;

$clean.="<$name";
$clean.=" $cleanargs" if $cleanargs;
$clean.=" $implicit" if $implicit;
$clean.=">";
}
}
}
}
}

my $entry;
while($entry=pop @stack) { $clean.="</$entry>" }

return $clean;
}
This thread has been closed. You cannot post in this thread any longer.