Scripts
Spam Rater
Dit is om de irritante poker spam bots tegen te gaan die ook hier de boel onveilig maken. De nieuwste versie is altijd hier: http://www.legolasweb.nl/content/view/44/35/ . Deze kijkt naar hoeveelheid a href links gelijk heid tussen links, header 1's, verhouding tekst / links etc. Nu dus ook met blacklist optie, deze houd bij welke woorden meer dan 15% van de tekst van 100% rated posts beslaan.
spam-rater
<?php
//
// Spam Rater 1.3
//
// By: Legolas
// email: legolas [AT] legolasweb [DOT] nl
// web: http://www.legolasweb.nl/
//
//
// Use:
// int spam_check(mixed $title, mixed $message[, string $blacklist]);
// returns a value between 0 and 100, which represents the spam-rating
// 100% - Spam
// 70% - Good-chance it's spam
// 0% - Probably not spam
function spam_check($title, $message, $blacklist = null) {
$rating = 0;
$bl_switch = false;
$message = stripslashes(strtolower(str_replace("'", "\"", $message)));
$test_parts = preg_split("/(<a href=\"|\">|<\/a>)/", $message);
$stuff = array();
$links = array();
$titles = array();
for ($i = 0; $i < count($test_parts); $i++) {
if (floor($i / 3) == $i / 3) {
$stuff[] = $test_parts[$i];
}
elseif (floor(($i - 1) / 3) == ($i - 1) / 3) {
$links[] = $test_parts[$i];
}
elseif (floor(($i - 2) / 3) == ($i - 2) / 3) {
$titles[] = $test_parts[$i];
}
}
if ($blacklist != null) {
if (file_exists($blacklist)) {
$bl_str = file_get_contents($blacklist);
$bl_arr = explode(",", $bl_str);
for ($i = 0; $i < count($bl_arr); $i++) {
$bl_arr[$i] = base64_decode($bl_arr[$i]);
}
$bl_switch = true;
}
}
$stuffwords = array();
for ($i = 0; $i < count($stuff); $i++) {
$stuff_exp = preg_split("/( |\n|\r|\t)/", strip_tags($stuff[$i]));
for ($j = 0; $j < count($stuff_exp); $j++) {
if ($stuff_exp[$j] != null) {
$stuffwords[] = $stuff_exp[$j];
}
}
}
for ($i = 0; $i < count($titles); $i++) {
$titles_exp = preg_split("/( |\n|\r|\t)/", strip_tags($titles[$i]));
for ($j = 0; $j < count($titles_exp); $j++) {
if ($titles_exp[$j] != null) {
$stuffwords[] = $titles_exp[$j];
}
}
}
// Links on text (-100)
$test_worth = 100;
$stuffchars = 0;
$linkchars = 0;
for ($i = 0; $i < count($stuff); $i++) {
$stuffchars += strlen($stuff[$i]);
}
for ($i = 0; $i < count($links); $i++) {
$linkchars += strlen($links[$i]);
}
$score = ($linkchars / $stuffchars) * $test_worth;
if ($score > $test_worth) {
$score = $test_worth;
}
$rating += $score - $test_worth;
// Links test (50)
$test_worth = 50;
$unique_links = array();
for ($i = 0; $i < count($links); $i++) {
if (!in_array($links[$i], $unique_links)) {
$unique_links[] = $links[$i];
}
}
if (count($unique_links) > 0) {
$rating += (count($links) / count($unique_links)) * ($test_worth / count($links));
}
// Link base test (50)
$test_worth = 50;
$unique_link_bases = array();
$unique_link_sets = array();
$unique_link_querystrings = array();
for ($i = 0; $i < count($unique_links); $i++) {
$ul = $unique_links[$i];
if (substr($ul, 0, 7) == "http://") {
$ul = substr($ul, 7);
}
$ul_parts = explode("?", $ul);
$unique_link_sets[$i] = explode("/", $ul_parts[0]);
$ul_qs = null;
if (count($ul_parts) > 1) {
for ($j = 1; $j < count($ul_parts); $j++) {
if ($j != 1) {
$ul_qs .= "?";
}
$ul_qs .= $ul_parts[$j];
}
}
if (!in_array($ul_qs, $unique_link_querystrings)) {
$unique_link_querystrings[] = $ul_qs;
}
if (!in_array($unique_link_sets[$i][0], $unique_link_bases)) {
$unique_link_bases[] = $unique_link_sets[$i][0];
}
}
if (count($unique_link_bases) > 0) {
$rating += (count($unique_links) / count($unique_link_bases)) * ($test_worth / count($unique_links));
}
// Link parts test (50)
$test_worth = 50;
$unique_link_parts = array();
$total_part_count = 0;
for ($i = 0; $i < count($unique_link_sets); $i++) {
for ($j = 1; $j < count($unique_link_sets[$i]); $j++) {
$ul = $unique_link_sets[$i][$j];
if ($j == count($unique_link_sets[$i]) - 1) {
$ulx = explode(".", $ul);
$ul = null;
for ($k = 0; $k < count($ulx) - 1; $k++) {
if ($k != 0) {
$ul .= ".";
}
$ul .= $ulx[$k];
}
//if (strstr($ul, "?")) {
// $ulx = explode("?", $ul);
// $ul = $ulx[0];
//}
}
//if (substr($ul, -5) == ".html") {
// $ul = substr($ul, 0, -5);
//}
//elseif (substr($ul, -4) == ".htm") {
// $ul = substr($ul, 0, -4);
//}
$parts = preg_split("/(-|_)/", $ul);
foreach ($parts as $part) {
if (!in_array($part, $unique_link_parts) && $part != null) {
$unique_link_parts[] = $part;
}
$total_part_count++;
}
}
}
if (count($unique_link_parts) > 0) {
$rating += ($total_part_count / count($unique_link_parts)) * ($test_worth / $total_part_count);
}
// Black list test (50)
$test_worth = 50;
$test = 0;
if ($bl_switch == true) {
for ($i = 0; $i < count($stuffwords); $i++) {
if (in_array($stuffwords[$i], $bl_arr)) {
$test++;
}
}
if ($test > 0) {
$rating += (count($stuffwords) / $test) * ($test_worth / count($stuffwords));
}
}
// Black list link parts test (50)
$test_worth = 50;
$test = 0;
if ($bl_switch == true) {
for ($i = 0; $i < count($unique_link_parts); $i++) {
if (in_array($unique_link_parts[$i], $bl_arr)) {
$test++;
}
}
if ($test > 0) {
$rating += (count($unique_link_parts) / $test) * ($test_worth / count($unique_link_parts));
}
}
// Link title test (50)
$test_worth = 50;
$test = 0;
for ($i = 0; $i < count($titles); $i++) {
if (in_array($titles[$i], $links)) {
$test++;
}
elseif ($titles[$i] == $title) {
$test++;
}
}
if ($test > 0) {
$rating += (count($titles) / $test) * ($test_worth / count($titles));
}
// Header test (10)
$test_worth = 10;
$test = 0;
if (substr($message, 0, 4) == "<h1>" && substr($message, -5) == "</h1>") {
$test = 1;
}
$rating += $test * $test_worth;
// Link in stuff test (50)
$test_worth = 50;
$test = 0;
for ($i = 0; $i < count($stuff); $i++) {
for ($j = 0; $j < count($unique_links); $j++) {
if (strpos($stuff[$i], $links[$j])) {
$test++;
}
}
}
if ($test > 0) {
$rating += (count($stuff) / $test) * ($test_worth / count($stuff));
}
// Black list updating (15% of the text)
$minimal_occurence = 15;
$minimal_rating = 100;
if ($bl_switch == true && $rating >= $minimal_rating) {
$propose = array();
for ($i = 0; $i < count($stuffwords); $i++) {
if (!in_array($stuffwords[$i], $bl_arr) && $stuffwords[$i] != null) {
if (!array_key_exists($stuffwords[$i], $propose)) {
$propose[$stuffwords[$i]] = 1;
}
else {
$propose[$stuffwords[$i]]++;
}
}
}
for ($i = 0; $i < count($unique_link_parts); $i++) {
if (!in_array($unique_link_parts[$i], $bl_arr) && $unique_link_parts[$i] != null) {
if (!array_key_exists($unique_link_parts[$i], $propose)) {
$propose[$unique_link_parts[$i]] = 1;
}
else {
$propose[$unique_link_parts[$i]]++;
}
}
}
$total_propose = 0;
foreach ($propose as $value) {
$total_propose += $value;
}
foreach ($propose as $key => $value) {
if ($value > $total_propose * ($minimal_occurence / 100)) {
$bl_arr[] = $key;
}
}
$bl_str = null;
$out = array();
for ($i = 0; $i < count($bl_arr); $i++) {
if (!empty($bl_arr[$i])) {
$out[] = $bl_arr[$i];
}
}
$bl_arr = $out;
for ($i = 0; $i < count($bl_arr); $i++) {
if ($i != 0) {
$bl_str .= ",";
}
$bl_str .= base64_encode($bl_arr[$i]);
}
$fh = fopen($blacklist, "w");
fwrite($fh, $bl_str);
fclose($fh);
}
if ($rating < 0) {
$rating = 0;
}
if ($rating > 100) {
$rating = 100;
}
return round($rating);
}
if (!empty($_REQUEST["title"]) && !empty($_REQUEST["message"])) {
echo("Rated: " . spam_check($_REQUEST["title"], $_REQUEST["message"]) . "%");
}
?>
Reacties
0