Написал скрипт - парсер, на PHP. Парсер на cuRL с многопоточностью. Скриптом доволен, но как-то долго парсит, я думал с многопоточностью будет намного скорее.
Скрипт парсера:
PHP:
скопировать код в буфер обмена
скопировать код в буфер обмена
- <?PHP
- $categoryid = $_GET['cat'];
- }
- function get_row_count($table, $suffix = "") {
- global $mysqli;
- if ($suffix)
- $suffix = " $suffix";
- ($r = $mysqli->query("SELECT COUNT(*) FROM $table$suffix"));
- ($a = $r->fetch_row());
- return $a[0];
- }
- function sqlesc($value) {
- global $mysqli;
- $value = "'" . $mysqli->real_escape_string($value) . "'";
- }
- return $value;
- }
- function sqlerr($file = "", $line = "", $error = "") {
- global $USER;
- die("<b>MySQL Error:</b><br /><b>File:</b> ". $file ." <b>Line:</b> ". $line ."<br /><b>Error:</b> ". $error);
- }
- "host" => "localhost",
- "user" => "root",
- "password" => "123456",
- "database" => "site",
- "charset" => "utf8"
- );
- $mysqli = @ new mysqli($mysql['host'], $mysql['user'], $mysql['password'], $mysql['database']);
- @ $mysqli->query("SET NAMES " . $mysql['charset']);
- }
- $ccc = get_row_count("links WHERE categoryid = '" . $cat . "' AND used='no'");
- $catnext = $cat + 1;
- if($cat > 64) {
- }
- if($ccc == 0) {
- echo '<meta http-equiv="refresh" content="0;URL=3.php?cat=' . $catnext . '" />';
- die;
- }
- $result = $mysqli->query("SELECT * FROM links WHERE used = 'no' AND categoryid = '" . $categoryid . "' LIMIT 5") or sqlerr(__FILE__, __LINE__, $mysqli->error);
- while($row = $result->fetch_assoc()) {
- $urls[] = $row['link'];
- }
- foreach ($urls as $url) {
- $url = "http://URL_SITE" . $url;
- $channels[$url] = $ch;
- }
- $active = null;
- do {
- } while ($mrc == CURLM_CALL_MULTI_PERFORM);
- while ($active && $mrc == CURLM_OK) {
- continue;
- }
- do {
- } while ($mrc == CURLM_CALL_MULTI_PERFORM);
- }
- foreach ($channels as $value => $channel ) {
- $result = preg_replace('#<noindex.*?</noindex>|<script.*?</script>|<ins.*?</ins>|<!--.*?-->#si', '', $tmp[1]);
- foreach($urlsex[1] as $val) {
- $urlsex_end .= $val . "\n";
- }
- $mysqli->query("UPDATE links SET used = 'yes' WHERE link = " . sqlesc($value)) or sqlerr(__FILE__, __LINE__, $mysqli->error);
- }
- $a = get_row_count("links");
- $b = get_row_count("texts");
- $number1 = $a;
- $number2 = $b;
- $result = ($number2 / $number1) * 100;
- ?>
- <meta http-equiv="refresh" content="0;URL=3.php?cat=<?=$cat;?>" />