tor-spider/tor.php

54 lines
1.6 KiB
PHP

<?php
use GuzzleHttp\Client;
use GuzzleHttp\Handler\CurlHandler;
use GuzzleHttp\HandlerStack;
use GuzzleTor\Middleware;
use Symfony\Component\DomCrawler\Crawler;
require 'vendor/autoload.php';
function debug($content) {
print 'Debug : ' . $content . PHP_EOL;
}
function get_tor_ip()
{
$stack = new HandlerStack();
debug('$stack = new HandlerStack();');
$stack->setHandler(new CurlHandler());
debug('$stack->setHandler(new CurlHandler());');
$stack->push(Middleware::tor());
debug('$stack->push(MiddleWare::tor());');
$client = new Client(['handler' => $stack]);
debug('$client = new Client([\'handler\' => $stack]);');
//$response = $client->get('https://check.torproject.org/');
debug('$response = $client->get(\'http://skunksworkedp2cg.onion/sites.html\');');
$response = $client->get('http://skunksworkedp2cg.onion/sites.html');
$crawler = new Crawler();
debug('$crawler = new Crawler();');
$crawler->addHtmlContent($response->getBody()->getContents());
debug('$crawler->addHtmlContent($response->getBody()->getContents());');
$links = $crawler->filter('a');
debug('$links = $crawler->filter(\'a\');');
debug('links filtered');
$hrefs = [];
foreach($links as $domElement) {
$href = $domElement->getAttribute('href');
if(0 !== strlen(parse_url($href, PHP_URL_SCHEME))) {
$hrefs[] = $href;
}
}
file_put_contents('elements.json', json_encode($hrefs, JSON_PRETTY_PRINT));
//if (preg_match('/<strong>([\d.]+)<\/strong>/', $response->getBody(), $matches)) {
// return $matches[1];
//} else {
// return null;
//}
}
get_tor_ip();