脚本藏身之处不过有四:
1、<script>标签、<link>标签、<style>标签、iframe标签 2、on开头的标签属性
3、javascript(vbscript)伪协议
4、css的epression
下面是他们的字符串规则:
1、<(script|link|style|iframe)(.|n)*</1>s*
2、s*on[a-z]+s*=s*("[^"]+"|'[^']+'|[^s]+)s*(?=>)
3、s*(href|src)s*=s*("s*(javascript|vbscript):[^"]+"|'s*(javascript|vbscript):[^']+'|(javascript|vbscript):[^s]+)s*(?=>)
4、epression((.|n)*);?
了解他们的规则后,抓虫行动就水到渠成。
运行可以看到效果:
<textarea id="bug" cols="80" rows="16">
<button id="kick">抓虫1</button>
<script>
function kickBug(str) {
return str.replace(/<(script|link|style|iframe)(.|n)*/1>s*/ig,"");
}
</script>
<iframe></iframe>
<link href='test.css'></link>
<style>
a {
height:expression(alert('hei'));
}
</style>
</textarea>
<button id="kick">抓虫1</button>
<script>
function kickBug(str) {
return str.replace(/<(script|link|style|iframe)(.|n)*/1>s*/ig,"");
}
if(!/msie/i.test(navigator.userAgent)){
HTMLElement.prototype.__defineGetter__("innerText",function(){
return this.textContent;
});
HTMLElement.prototype.__defineSetter__("innerText",function(text){
this.textContent = text;
});
}
document.getElementById("kick").onclick = function() {
var bug = document.getElementById("bug");
bug.innerText = kickBug(bug.innerText);
}
</script>
运行可以看到效果:
<textarea id="bug" cols="80" rows="5">
<a onclick="test();
test1()" onblur=
"test3()">test</a>
</textarea>
<button id="kick">抓虫2</button>
<script>
function kickBug(str) {
return str.replace(/<[a-z][^>]*s*on[a-z]+s*=[^>]+/ig,function({GetProperty(Content)},{GetProperty(Content)}){
return {GetProperty(Content)}.replace(/s*on[a-z]+s*=s*("[^"]+"|'[^']+'|[^s]+)s*/ig,"");
});
}
if(!/msie/i.test(navigator.userAgent)){
HTMLElement.prototype.__defineGetter__("innerText",function(){
return this.textContent;
});
HTMLElement.prototype.__defineSetter__("innerText",function(text){
this.textContent = text;
});
}
document.getElementById("kick").onclick = function() {
var bug = document.getElementById("bug");
bug.innerText = kickBug(bug.innerText);
}
</script>
运行可以看到效果:
<textarea id="bug" cols="80" rows="5">
<a onclick="test();" href="
jAvascript:alert('a')" href="jAvascript:"
href="vbscript:alert()"
>test</a>
</textarea>
<button id="kick">抓虫3</button>
<script>
function kickBug(str) {
return str.replace(/<[a-z][^>]*s*(href|src)s*=[^>]+/ig,function({GetProperty(Content)},{GetProperty(Content)}){
{GetProperty(Content)} = {GetProperty(Content)}.replace(/&#(6[5-9]|[78][0-9]|9[0789]|1[01][0-9]|12[012]);?/g,function({GetProperty(Content)},{GetProperty(Content)}){return String.fromCharCode({GetProperty(Content)});});
return {GetProperty(Content)}.replace(/s*(href|src)s*=s*("s*(javascript|vbscript):[^"]+"|'s*(javascript|vbscript):[^']+'|(javascript|vbscript):[^s]+)/ig,"");
});
}
if(!/msie/i.test(navigator.userAgent)){
HTMLElement.prototype.__defineGetter__("innerText",function(){
return this.textContent;
});
HTMLElement.prototype.__defineSetter__("innerText",function(text){
this.textContent = text;
});
}
document.getElementById("kick").onclick = function() {
var bug = document.getElementById("bug");
bug.innerText = kickBug(bug.innerText);
}
运行可以看到效果
</script>
<textarea id="bug" cols="80" rows="5">
expression()
<a style="color:expression(
'red'
)">test</a>
</textarea>
<button id="kick">抓虫4</button>
<script>
function kickBug(str) {
return str.replace(/<[a-z][^>]*s*styles*=[^>]+/ig,function({GetProperty(Content)},{GetProperty(Content)}){
{GetProperty(Content)} = {GetProperty(Content)}.replace(/&#(6[5-9]|[78][0-9]|9[0789]|1[01][0-9]|12[012]);?/g,function({GetProperty(Content)},{GetProperty(Content)}){return String.fromCharCode({GetProperty(Content)});});
return {GetProperty(Content)}.replace(/s*styles*=s*("[^"]+(expression)[^"]+"|'[^']+2[^']+'|[^s]+2[^s]+)s*/ig,"");
});
}
if(!/msie/i.test(navigator.userAgent)){
HTMLElement.prototype.__defineGetter__("innerText",function(){
return this.textContent;
});
HTMLElement.prototype.__defineSetter__("innerText",function(text){
this.textContent = text;
});
}
document.getElementById("kick").onclick = function() {
var bug = document.getElementById("bug");
bug.innerText = kickBug(bug.innerText);
}
</script>
这样调用就可以
k1(k2(k3(k4(str))))
这样就是单纯地过滤脚本而已,所谓过滤“危险脚本”应该是能够判断哪些属于“危险"脚本,不危险的就不过滤才对……那可就难办了,相当于防火墙了