Finished example

modified:   composer.json
	modified:   composer.lock
	new file:   example.php
	modified:   src/Sikofitt/Tor/Collection/ImageCollection.php
	modified:   src/Sikofitt/Tor/TorClient.php
	modified:   tor-client/bin/tor
	modified:   tor-client/bin/tor-gencert
	modified:   tor-client/bin/tor-resolve
	modified:   tor-client/etc/tor/torrc.sample
	modified:   tor-client/share/doc/tor/tor.html
	modified:   tor-client/share/man/man1/tor.1
This commit is contained in:
R. Eric Wheeler 2016-12-23 19:33:21 -08:00
parent 7f0b08c9c8
commit 2a004a54be
11 changed files with 299 additions and 32 deletions

View File

@ -26,13 +26,18 @@
"symfony/process": "^2.8",
"symfony/dom-crawler": "^2.8",
"symfony/console": "^2.8",
"doctrine/collections": "^1.3"
"doctrine/collections": "^1.3",
"monolog/monolog": "^1.22"
},
"suggest": {
"ext-zip":"For unzipping tor zip on windows."
},
"config": {
"platform": {
"php": "5.5"
}
},
"require-dev": {
"phpunit/phpunit": "^4.8",
"friendsofphp/php-cs-fixer": "^2.0",

82
composer.lock generated
View File

@ -4,8 +4,8 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
"hash": "9ec1203744073948a67a86ddfbf1d1e2",
"content-hash": "83d61f647d294058c67c53ea5fab1dc9",
"hash": "7c842d90a1889fdc4f1a20870e228885",
"content-hash": "acbb85e6793692dab952a6d42f6f1618",
"packages": [
{
"name": "doctrine/collections",
@ -289,6 +289,84 @@
],
"time": "2016-03-26 03:44:56"
},
{
"name": "monolog/monolog",
"version": "1.22.0",
"source": {
"type": "git",
"url": "https://github.com/Seldaek/monolog.git",
"reference": "bad29cb8d18ab0315e6c477751418a82c850d558"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/Seldaek/monolog/zipball/bad29cb8d18ab0315e6c477751418a82c850d558",
"reference": "bad29cb8d18ab0315e6c477751418a82c850d558",
"shasum": ""
},
"require": {
"php": ">=5.3.0",
"psr/log": "~1.0"
},
"provide": {
"psr/log-implementation": "1.0.0"
},
"require-dev": {
"aws/aws-sdk-php": "^2.4.9 || ^3.0",
"doctrine/couchdb": "~1.0@dev",
"graylog2/gelf-php": "~1.0",
"jakub-onderka/php-parallel-lint": "0.9",
"php-amqplib/php-amqplib": "~2.4",
"php-console/php-console": "^3.1.3",
"phpunit/phpunit": "~4.5",
"phpunit/phpunit-mock-objects": "2.3.0",
"ruflin/elastica": ">=0.90 <3.0",
"sentry/sentry": "^0.13",
"swiftmailer/swiftmailer": "~5.3"
},
"suggest": {
"aws/aws-sdk-php": "Allow sending log messages to AWS services like DynamoDB",
"doctrine/couchdb": "Allow sending log messages to a CouchDB server",
"ext-amqp": "Allow sending log messages to an AMQP server (1.0+ required)",
"ext-mongo": "Allow sending log messages to a MongoDB server",
"graylog2/gelf-php": "Allow sending log messages to a GrayLog2 server",
"mongodb/mongodb": "Allow sending log messages to a MongoDB server via PHP Driver",
"php-amqplib/php-amqplib": "Allow sending log messages to an AMQP server using php-amqplib",
"php-console/php-console": "Allow sending log messages to Google Chrome",
"rollbar/rollbar": "Allow sending log messages to Rollbar",
"ruflin/elastica": "Allow sending log messages to an Elastic Search server",
"sentry/sentry": "Allow sending log messages to a Sentry server"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.0.x-dev"
}
},
"autoload": {
"psr-4": {
"Monolog\\": "src/Monolog"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Jordi Boggiano",
"email": "j.boggiano@seld.be",
"homepage": "http://seld.be"
}
],
"description": "Sends your logs to files, sockets, inboxes, databases and various web services",
"homepage": "http://github.com/Seldaek/monolog",
"keywords": [
"log",
"logging",
"psr-3"
],
"time": "2016-11-26 00:15:39"
},
{
"name": "psr/http-message",
"version": "1.0.1",

25
example.php Normal file
View File

@ -0,0 +1,25 @@
<?php
/**
* Created by PhpStorm.
* User: eric
* Date: 12/23/16
* Time: 4:19 PM
*/
require __DIR__ . '/vendor/autoload.php';
$client = new \Sikofitt\Tor\TorClient();
//$client->get('http://32b5oz2bbtn6gqj3.onion/index.php/Main_Page');
$client->get('http://4sy6ebszykvcv2n6.onion/');
//$client->get('https://de.indymedia.org/index.shtml');
$images = $client->images();
//dump($images);
foreach($images as $image) {
$result[] = $image->toArray();
}
dump($result);
dump(json_encode(serialize($result)));
file_put_contents('data.json', json_encode($result, JSON_PRETTY_PRINT));

View File

@ -9,28 +9,145 @@
namespace Sikofitt\Tor\Collection;
use Doctrine\Common\Collections\ArrayCollection;
use GuzzleHttp\ClientInterface;
use GuzzleHttp\Psr7\Uri;
use Monolog\Handler\StreamHandler;
use Monolog\Logger;
use Sikofitt\Tor\TorClient;
use Symfony\Component\DomCrawler\Crawler;
class ImageCollection
{
/**
* @var Uri
* The Uri we are collecting images from
*/
private $uri;
/**
* @var string
* The html we are parsing
*/
private $html;
/**
* @var ArrayCollection
* A container for the found images.
*/
private $images;
/**
* @var Crawler
* Our dom crawler
*/
private $crawler;
public function __construct($html)
/**
* @var ClientInterface
* Our http client
*/
private $client;
/**
* @var Logger
* Our logger
*/
private $logger;
/**
* @var int
* The timeout between image requests.
*/
private $timeout;
/**
* ImageCollection constructor.
* @param $uri
* @param $html
* @param ClientInterface $client
* @param int $timeout
*/
public function __construct($uri, $html, ClientInterface $client, $timeout = 10)
{
$this->timeout = $timeout;
$this->logger = new Logger('tor-spider-ImageCollection');
$this->logger->pushHandler(new StreamHandler('php://stdout', Logger::DEBUG));
$this->client = $client;
$this->html = $html;
$this->uri = new Uri($uri);
$this->images = new ArrayCollection();
$this->crawler = new Crawler();
}
private function sleep() {
$this->logger->debug('sleeping for ' . $this->timeout . ' seconds.');
for($i=$this->timeout;$i>=1;$i--)
{
if(true === in_array($i, array(ceil($this->timeout/2), 2, 1))) {
$this->logger->debug($i . ' Seconds ...');
}
sleep(1);
}
$this->logger->debug('Continuing ...');
}
/**
* @return ArrayCollection
*/
public function images()
{
$this->crawler->addHtmlContent($this->html);
$images = $this->crawler->filter('img');
$sources = [];
foreach($images as $image) {
$this->images->add(new ArrayCollection($image->attributes));
if(in_array($image->getAttribute('src'), array_values($sources)))
{
$this->logger->debug('Aready have source skipping ...', array('url' => $image->getAttribute('src')));
continue;
}
$sources[] = $image->getAttribute('src');
$alt = $image->getAttribute('alt');
if(0 === strlen($alt)) {
$this->logger->debug('Setting alt to image src', array('src' => $image->getAttribute('src')));
$alt = $image->getAttribute('src');
} else {
$this->logger->debug('Found alt ... was ' . $alt);
}
$uri = new Uri($image->getAttribute('src'));
if(0 === strlen($uri->getScheme())) {
$uri = $uri->withScheme($this->uri->getScheme());
}
if(0 === strlen($uri->getHost())) {
$uri = $uri->withHost($this->uri->getHost());
}
try {
$request = $this->client->get(sprintf('%s://%s/%s',
$uri->getScheme(), $uri->getHost(),
ltrim($uri->getPath(), '/')));
if(404 === $request->getStatusCode()) {
$this->logger->debug('Got 404 skipping ... ', array('url' => sprintf('%s://%s/%s',
$uri->getScheme(), $uri->getHost(),
ltrim($uri->getPath(), '/'))));
continue;
}
$raw = $request->getBody()->getContents();
} catch(\Exception $e) {
print $e->getMessage();
$this->logger->error($e->getMessage(), $e->getTrace());
continue;
}
$result['alt'] = $alt;
$result['fqd'] = sprintf('%s://%s/%s', $uri->getScheme(), $uri->getHost(), ltrim($uri->getPath(), '/'));
$result['src'] = $image->getAttribute('src');
$result['raw'] = $raw;
$result['base64'] = base64_encode($raw);
$this->images->set($alt, $result);
$this->sleep();
}
return $this->images;
}

View File

@ -9,11 +9,16 @@
namespace Sikofitt\Tor;
use Doctrine\Common\Collections\ArrayCollection;
use GuzzleHttp\Client;
use GuzzleHttp\ClientInterface;
use GuzzleHttp\Handler\CurlMultiHandler;
use GuzzleHttp\HandlerStack;
use GuzzleHttp\Psr7\Stream;
use GuzzleTor\Middleware;
use Monolog\Handler\StreamHandler;
use Monolog\Handler\SyslogHandler;
use Monolog\Logger;
use Sikofitt\Tor\Collection\ImageCollection;
use Sikofitt\Tor\Exception\BadProxyUrlException;
@ -22,7 +27,7 @@ use Sikofitt\Tor\Exception\BadProxyUrlException;
*
* @package Sikofitt\Tor
*/
class TorClient extends Client implements ClientInterface
class TorClient
{
/**
@ -49,19 +54,27 @@ class TorClient extends Client implements ClientInterface
* @var Middleware
*/
private $middleware;
private $poolData;
private $htmlData;
private $images;
private $logger;
public function __construct(
$proxy = '127.0.0.1:9050',
$torControl = '127.0.0.1:9051'
) {
$this->logger = new Logger('tor-spider');
$this->logger->pushHandler(new StreamHandler('php://stdout', Logger::DEBUG));
$this->logger->pushHandler(new SyslogHandler('tor-spider'), Logger::DEBUG);
$this->proxy = $proxy;
$this->torControl = $torControl;
$this->htmlData = new ArrayCollection();
$this->logger->debug('Setting up tor handler');
$this->setTorMiddleWare();
$this->createHandlerStack();
$this->setClient();
$this->logger->debug('Finished');
}
/**
@ -69,7 +82,9 @@ class TorClient extends Client implements ClientInterface
*/
public function setTorMiddleWare()
{
$this->logger->debug('Setting up tor middleware.');
$this->middleware = Middleware::tor($this->proxy, $this->torControl);
$this->logger->debug('Finished');
return $this;
}
@ -96,12 +111,33 @@ class TorClient extends Client implements ClientInterface
$this->client = new Client([
'verify' => false,
'handler' => $this->handlerStack,
'allow_redirects' => true,
'max_redirects' => 20,
]);
return $this;
}
public function get($uris, array $options = [])
{
if(is_array($uris) || is_object($uris))
{
$this->htmlData = $this->pool($uris);
} else {
try {
$this->logger->debug('Requesting ' . $uris . ' ...');
$response = $this->client->get($uris, $options);
$this->htmlData->set($uris,
$response->getBody()->getContents());
$this->logger->debug('html retrieved', array('uri' => $uris));
} catch(\Exception $e) {
$this->logger->error($e->getMessage(), $e->getTrace());
return null;
}
}
}
public function setImages($images)
{
$this->images = $images;
@ -200,10 +236,16 @@ class TorClient extends Client implements ClientInterface
return $this;
}
public function images($html)
public function images()
{
$images = new ImageCollection($html);
return $this;
foreach($this->htmlData as $uri => $html)
{ $image = new ImageCollection($uri, $html, $this->client, 5);
$this->images[] = $image->images();
}
return $this->images;
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -34,10 +34,10 @@
## We advise using "notice" in most cases, since anything more verbose
## may provide sensitive information to an attacker who obtains the logs.
##
## Send all messages of level 'notice' or higher to /home/eric/projects/tor/tor-0.2.9.8/../tor-client/var/log/tor/notices.log
#Log notice file /home/eric/projects/tor/tor-0.2.9.8/../tor-client/var/log/tor/notices.log
## Send every possible message to /home/eric/projects/tor/tor-0.2.9.8/../tor-client/var/log/tor/debug.log
#Log debug file /home/eric/projects/tor/tor-0.2.9.8/../tor-client/var/log/tor/debug.log
## Send all messages of level 'notice' or higher to /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/var/log/tor/notices.log
#Log notice file /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/var/log/tor/notices.log
## Send every possible message to /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/var/log/tor/debug.log
#Log debug file /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/var/log/tor/debug.log
## Use the system log instead of Tor's logfiles
#Log notice syslog
## To send all messages to stderr:
@ -50,7 +50,7 @@
## The directory for keeping all the keys/etc. By default, we store
## things in $HOME/.tor on Unix, and in Application Data\tor on Windows.
#DataDirectory /home/eric/projects/tor/tor-0.2.9.8/../tor-client/var/lib/tor
#DataDirectory /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/var/lib/tor
## The port on which Tor will listen for local connections from Tor
## controller applications, as documented in control-spec.txt.
@ -69,10 +69,10 @@
## HiddenServicePort x y:z says to redirect requests on port x to the
## address y:z.
#HiddenServiceDir /home/eric/projects/tor/tor-0.2.9.8/../tor-client/var/lib/tor/hidden_service/
#HiddenServiceDir /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/var/lib/tor/hidden_service/
#HiddenServicePort 80 127.0.0.1:80
#HiddenServiceDir /home/eric/projects/tor/tor-0.2.9.8/../tor-client/var/lib/tor/other_hidden_service/
#HiddenServiceDir /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/var/lib/tor/other_hidden_service/
#HiddenServicePort 80 127.0.0.1:80
#HiddenServicePort 22 127.0.0.1:22
@ -147,7 +147,7 @@
## can explain what Tor is if anybody wonders why your IP address is
## contacting them. See contrib/tor-exit-notice.html in Tor's source
## distribution for a sample.
#DirPortFrontPage /home/eric/projects/tor/tor-0.2.9.8/../tor-client/etc/tor/tor-exit-notice.html
#DirPortFrontPage /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/etc/tor/tor-exit-notice.html
## Uncomment this if you run more than one Tor relay, and add the identity
## key fingerprint of each Tor relay you control, even if they're on

View File

@ -786,7 +786,7 @@ Project&#8217;s website.</p></div>
<p>
Specify a new configuration file to contain further Tor configuration
options OR pass <strong>-</strong> to make Tor read its configuration from standard
input. (Default: /home/eric/projects/tor/tor-0.2.9.8/../tor-client/etc/tor/torrc, or $HOME/.torrc if that file is not
input. (Default: /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/etc/tor/torrc, or $HOME/.torrc if that file is not
found)
</p>
</dd>
@ -807,7 +807,7 @@ Project&#8217;s website.</p></div>
Specify a file in which to find default values for Tor options. The
contents of this file are overridden by those in the regular
configuration file, and by those on the command line. (Default:
/home/eric/projects/tor/tor-0.2.9.8/../tor-client/etc/tor/torrc-defaults.)
/home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/etc/tor/torrc-defaults.)
</p>
</dd>
<dt class="hdlist1">
@ -1374,7 +1374,7 @@ forward slash (/) in the configuration file and on the command line.</p></div>
</dt>
<dd>
<p>
Store working data in DIR (Default: /home/eric/projects/tor/tor-0.2.9.8/../tor-client/var/lib/tor)
Store working data in DIR (Default: /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/var/lib/tor)
</p>
</dd>
<dt class="hdlist1">
@ -5248,7 +5248,7 @@ TestingEnableTbEmptyEvent 1</code></pre>
<div class="sectionbody">
<div class="dlist"><dl>
<dt class="hdlist1">
<strong>/home/eric/projects/tor/tor-0.2.9.8/../tor-client/etc/tor/torrc</strong>
<strong>/home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/etc/tor/torrc</strong>
</dt>
<dd>
<p>
@ -5260,11 +5260,11 @@ TestingEnableTbEmptyEvent 1</code></pre>
</dt>
<dd>
<p>
Fallback location for torrc, if /home/eric/projects/tor/tor-0.2.9.8/../tor-client/etc/tor/torrc is not found.
Fallback location for torrc, if /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/etc/tor/torrc is not found.
</p>
</dd>
<dt class="hdlist1">
<strong>/home/eric/projects/tor/tor-0.2.9.8/../tor-client/var/lib/tor/</strong>
<strong>/home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/var/lib/tor/</strong>
</dt>
<dd>
<p>

View File

@ -50,7 +50,7 @@ Display a short help message and exit\&.
.RS 4
Specify a new configuration file to contain further Tor configuration options OR pass
\fB\-\fR
to make Tor read its configuration from standard input\&. (Default: /home/eric/projects/tor/tor-0.2.9.8/../tor-client/etc/tor/torrc, or $HOME/\&.torrc if that file is not found)
to make Tor read its configuration from standard input\&. (Default: /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/etc/tor/torrc, or $HOME/\&.torrc if that file is not found)
.RE
.PP
\fB\-\-allow\-missing\-torrc\fR
@ -62,7 +62,7 @@ exist if default torrc can be accessed\&.
.PP
\fB\-\-defaults\-torrc\fR \fIFILE\fR
.RS 4
Specify a file in which to find default values for Tor options\&. The contents of this file are overridden by those in the regular configuration file, and by those on the command line\&. (Default: /home/eric/projects/tor/tor-0.2.9.8/../tor-client/etc/tor/torrc\-defaults\&.)
Specify a file in which to find default values for Tor options\&. The contents of this file are overridden by those in the regular configuration file, and by those on the command line\&. (Default: /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/etc/tor/torrc\-defaults\&.)
.RE
.PP
\fB\-\-ignore\-missing\-torrc\fR
@ -373,7 +373,7 @@ If this option is set to 0, don\(cqt allow the filesystem group to read the cont
.PP
\fBDataDirectory\fR \fIDIR\fR
.RS 4
Store working data in DIR (Default: /home/eric/projects/tor/tor-0.2.9.8/../tor-client/var/lib/tor)
Store working data in DIR (Default: /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/var/lib/tor)
.RE
.PP
\fBDataDirectoryGroupReadable\fR \fB0\fR|\fB1\fR
@ -2547,17 +2547,17 @@ If this signal exists on your platform, Tor catches and ignores it\&.
.RE
.SH "FILES"
.PP
\fB/home/eric/projects/tor/tor-0.2.9.8/../tor-client/etc/tor/torrc\fR
\fB/home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/etc/tor/torrc\fR
.RS 4
The configuration file, which contains "option value" pairs\&.
.RE
.PP
\fB$HOME/\&.torrc\fR
.RS 4
Fallback location for torrc, if /home/eric/projects/tor/tor-0.2.9.8/../tor-client/etc/tor/torrc is not found\&.
Fallback location for torrc, if /home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/etc/tor/torrc is not found\&.
.RE
.PP
\fB/home/eric/projects/tor/tor-0.2.9.8/../tor-client/var/lib/tor/\fR
\fB/home/eric/projects/php/tor-spider/tor-0.2.9.8/../tor-client/var/lib/tor/\fR
.RS 4
The tor process stores keys and other data here\&.
.RE