<?php

//
// Spam Rater 1.3
//
// By:    Legolas
// email: legolas [AT] legolasweb [DOT] nl
// web:   http://www.legolasweb.nl/
//
//
// Use:
// int spam_check(mixed $title, mixed $message[, string $blacklist]);
// returns a value between 0 and 100, which represents the spam-rating
// 100% - Spam
// 70%  - Good-chance it's spam
// 0%   - Probably not spam

function spam_check($title, $message, $blacklist = null) {
	$rating = 0;
	$bl_switch = false;
	$message = stripslashes(strtolower(str_replace("'", "\"", $message)));
	$test_parts = preg_split("/(<a href=\"|\">|<\/a>)/", $message);
	$stuff = array();
	$links = array();
	$titles = array();
	for ($i = 0; $i < count($test_parts); $i++) {
		if (floor($i / 3) == $i / 3) {
			$stuff[] = $test_parts[$i];
		}
		elseif (floor(($i - 1) / 3) == ($i - 1) / 3) {
			$links[] = $test_parts[$i];
		}
		elseif (floor(($i - 2) / 3) == ($i - 2) / 3) {
			$titles[] = $test_parts[$i];
		}
	}

	if ($blacklist != null) {
		if (file_exists($blacklist)) {
			$bl_str = file_get_contents($blacklist);
			$bl_arr = explode(",", $bl_str);
			for ($i = 0; $i < count($bl_arr); $i++) {
				$bl_arr[$i] = base64_decode($bl_arr[$i]);
			}
			$bl_switch = true;
		}
	}

	$stuffwords = array();
	for ($i = 0; $i < count($stuff); $i++) {
		$stuff_exp = preg_split("/( |\n|\r|\t)/", strip_tags($stuff[$i]));
		for ($j = 0; $j < count($stuff_exp); $j++) {
			if ($stuff_exp[$j] != null) {
				$stuffwords[] = $stuff_exp[$j];
			}
		}
	}
	for ($i = 0; $i < count($titles); $i++) {
		$titles_exp = preg_split("/( |\n|\r|\t)/", strip_tags($titles[$i]));
		for ($j = 0; $j < count($titles_exp); $j++) {
			if ($titles_exp[$j] != null) {
				$stuffwords[] = $titles_exp[$j];
			}
		}
	}

	// Links on text (-100)
	$test_worth = 100;
	$stuffchars = 0;
	$linkchars = 0;
	for ($i = 0; $i < count($stuff); $i++) {
		$stuffchars += strlen($stuff[$i]);
	}
	for ($i = 0; $i < count($links); $i++) {
		$linkchars += strlen($links[$i]);
	}
	$score = ($linkchars / $stuffchars) * $test_worth;
	if ($score > $test_worth) {
		$score = $test_worth;
	}
	$rating += $score - $test_worth;

	// Links test (50)
	$test_worth = 50;
	$unique_links = array();
	for ($i = 0; $i < count($links); $i++) {
		if (!in_array($links[$i], $unique_links)) {
			$unique_links[] = $links[$i];
		}
	}
	if (count($unique_links) > 0) {
		$rating += (count($links) / count($unique_links)) * ($test_worth / count($links));
	}

	// Link base test (50)
	$test_worth = 50;
	$unique_link_bases = array();
	$unique_link_sets = array();
	$unique_link_querystrings = array();
	for ($i = 0; $i < count($unique_links); $i++) {
		$ul = $unique_links[$i];
		if (substr($ul, 0, 7) == "http://") {
			$ul = substr($ul, 7);
		}
		$ul_parts = explode("?", $ul);
		$unique_link_sets[$i] = explode("/", $ul_parts[0]);
		$ul_qs = null;
		if (count($ul_parts) > 1) {
			for ($j = 1; $j < count($ul_parts); $j++) {
				if ($j != 1) {
					$ul_qs .= "?";
				}
				$ul_qs .= $ul_parts[$j];
			}
		}
		if (!in_array($ul_qs, $unique_link_querystrings)) {
			$unique_link_querystrings[] = $ul_qs;
		}
		if (!in_array($unique_link_sets[$i][0], $unique_link_bases)) {
			$unique_link_bases[] = $unique_link_sets[$i][0];
		}
	}
	if (count($unique_link_bases) > 0) {
		$rating += (count($unique_links) / count($unique_link_bases)) * ($test_worth / count($unique_links));
	}

	// Link parts test (50)
	$test_worth = 50;
	$unique_link_parts = array();
	$total_part_count = 0;
	for ($i = 0; $i < count($unique_link_sets); $i++) {
		for ($j = 1; $j < count($unique_link_sets[$i]); $j++) {
			$ul = $unique_link_sets[$i][$j];
			if ($j == count($unique_link_sets[$i]) - 1) {
				$ulx = explode(".", $ul);
				$ul = null;
				for ($k = 0; $k < count($ulx) - 1; $k++) {
					if ($k != 0) {
						$ul .= ".";
					}
					$ul .= $ulx[$k];
				}
				//if (strstr($ul, "?")) {
				//	$ulx = explode("?", $ul);
				//	$ul = $ulx[0];
				//}
			}
			//if (substr($ul, -5) == ".html") {
			//	$ul = substr($ul, 0, -5);
			//}
			//elseif (substr($ul, -4) == ".htm") {
			//	$ul = substr($ul, 0, -4);
			//}
			$parts = preg_split("/(-|_)/", $ul);
			foreach ($parts as $part) {
				if (!in_array($part, $unique_link_parts) && $part != null) {
					$unique_link_parts[] = $part;
				}
				$total_part_count++;
			}
		}
	}
	if (count($unique_link_parts) > 0) {
		$rating += ($total_part_count / count($unique_link_parts)) * ($test_worth / $total_part_count);
	}

	// Black list test (50)
	$test_worth = 50;
	$test = 0;
	if ($bl_switch == true) {
		for ($i = 0; $i < count($stuffwords); $i++) {
			if (in_array($stuffwords[$i], $bl_arr)) {
				$test++;
			}
		}
		if ($test > 0) {
			$rating += (count($stuffwords) / $test) * ($test_worth / count($stuffwords));
		}
	}

	// Black list link parts test (50)
	$test_worth = 50;
	$test = 0;
	if ($bl_switch == true) {
		for ($i = 0; $i < count($unique_link_parts); $i++) {
			if (in_array($unique_link_parts[$i], $bl_arr)) {
				$test++;
			}
		}
		if ($test > 0) {
			$rating += (count($unique_link_parts) / $test) * ($test_worth / count($unique_link_parts));
		}
	}

	// Link title test (50)
	$test_worth = 50;
	$test = 0;
	for ($i = 0; $i < count($titles); $i++) {
		if (in_array($titles[$i], $links)) {
			$test++;
		}
		elseif ($titles[$i] == $title) {
			$test++;
		}
	}
	if ($test > 0) {
		$rating += (count($titles) / $test) * ($test_worth / count($titles));
	}

	// Header test (10)
	$test_worth = 10;
	$test = 0;
	if (substr($message, 0, 4) == "<h1>" && substr($message, -5) == "</h1>") {
		$test = 1;
	}
	$rating += $test * $test_worth;

	// Link in stuff test (50)
	$test_worth = 50;
	$test = 0;
	for ($i = 0; $i < count($stuff); $i++) {
		for ($j = 0; $j < count($unique_links); $j++) {
			if (strpos($stuff[$i], $links[$j])) {
				$test++;
			}
		}
	}
	if ($test > 0) {
		$rating += (count($stuff) / $test) * ($test_worth / count($stuff));
	}

	// Black list updating (15% of the text)
	$minimal_occurence = 15;
	$minimal_rating = 100;
	if ($bl_switch == true && $rating >= $minimal_rating) {
		$propose = array();
		for ($i = 0; $i < count($stuffwords); $i++) {
			if (!in_array($stuffwords[$i], $bl_arr) && $stuffwords[$i] != null) {
				if (!array_key_exists($stuffwords[$i], $propose)) {
					$propose[$stuffwords[$i]] = 1;
				}
				else {
					$propose[$stuffwords[$i]]++;
				}
			}
		}
		for ($i = 0; $i < count($unique_link_parts); $i++) {
			if (!in_array($unique_link_parts[$i], $bl_arr) && $unique_link_parts[$i] != null) {
				if (!array_key_exists($unique_link_parts[$i], $propose)) {
					$propose[$unique_link_parts[$i]] = 1;
				}
				else {
					$propose[$unique_link_parts[$i]]++;
				}
			}
		}

		$total_propose = 0;
		foreach ($propose as $value) {
			$total_propose += $value;
		}
		foreach ($propose as $key => $value) {
			if ($value > $total_propose * ($minimal_occurence / 100)) {
				$bl_arr[] = $key;
			}
		}
		$bl_str = null;
		$out = array();
		for ($i = 0; $i < count($bl_arr); $i++) {
			if (!empty($bl_arr[$i])) {
				$out[] = $bl_arr[$i];
			}
		}
		$bl_arr = $out;
		for ($i = 0; $i < count($bl_arr); $i++) {
			if ($i != 0) {
				$bl_str .= ",";
			}
			$bl_str .= base64_encode($bl_arr[$i]);
		}
		$fh = fopen($blacklist, "w");
		fwrite($fh, $bl_str);
		fclose($fh);
	}

	if ($rating < 0) {
		$rating = 0;
	}
	if ($rating > 100) {
		$rating = 100;
	}
	return round($rating);
}

if (!empty($_REQUEST["title"]) && !empty($_REQUEST["message"])) {
	echo("Rated: " . spam_check($_REQUEST["title"], $_REQUEST["message"]) . "%");
}

?>