tor-spider/tor.php

54 lines
1.6 KiB
PHP
Raw Permalink Normal View History

2016-12-21 12:57:48 -08:00
<?php
use GuzzleHttp\Client;
use GuzzleHttp\Handler\CurlHandler;
use GuzzleHttp\HandlerStack;
use GuzzleTor\Middleware;
2016-12-22 17:18:29 -08:00
use Symfony\Component\DomCrawler\Crawler;
2016-12-21 12:57:48 -08:00
require 'vendor/autoload.php';
2016-12-22 17:18:29 -08:00
function debug($content) {
print 'Debug : ' . $content . PHP_EOL;
}
2016-12-21 12:57:48 -08:00
function get_tor_ip()
{
$stack = new HandlerStack();
2016-12-22 17:18:29 -08:00
debug('$stack = new HandlerStack();');
2016-12-21 12:57:48 -08:00
$stack->setHandler(new CurlHandler());
2016-12-22 17:18:29 -08:00
debug('$stack->setHandler(new CurlHandler());');
$stack->push(Middleware::tor());
debug('$stack->push(MiddleWare::tor());');
2016-12-21 12:57:48 -08:00
2016-12-22 17:18:29 -08:00
$client = new Client(['handler' => $stack]);
debug('$client = new Client([\'handler\' => $stack]);');
2016-12-21 12:57:48 -08:00
//$response = $client->get('https://check.torproject.org/');
2016-12-22 17:18:29 -08:00
debug('$response = $client->get(\'http://skunksworkedp2cg.onion/sites.html\');');
2016-12-21 12:57:48 -08:00
$response = $client->get('http://skunksworkedp2cg.onion/sites.html');
2016-12-22 17:18:29 -08:00
$crawler = new Crawler();
debug('$crawler = new Crawler();');
$crawler->addHtmlContent($response->getBody()->getContents());
debug('$crawler->addHtmlContent($response->getBody()->getContents());');
$links = $crawler->filter('a');
debug('$links = $crawler->filter(\'a\');');
debug('links filtered');
$hrefs = [];
foreach($links as $domElement) {
$href = $domElement->getAttribute('href');
if(0 !== strlen(parse_url($href, PHP_URL_SCHEME))) {
$hrefs[] = $href;
}
}
file_put_contents('elements.json', json_encode($hrefs, JSON_PRETTY_PRINT));
2016-12-21 12:57:48 -08:00
//if (preg_match('/<strong>([\d.]+)<\/strong>/', $response->getBody(), $matches)) {
// return $matches[1];
//} else {
// return null;
//}
}
2016-12-22 17:18:29 -08:00
get_tor_ip();