<?php
namespace App\Service\Client;
use App\Utils\StringUtils;
use App\Utils\Utils;
use Facebook\WebDriver\Chrome\ChromeOptions;
use Facebook\WebDriver\Remote\DesiredCapabilities;
use Symfony\Component\Panther\Client;
/**
* See Experimental/src/Headless project
* https://github.com/sjitech/proxy-login-automator
*/
class HeadlessUtils
{
const BROWSERLESS_IO = 'https://4fa7d10a-0a31-405e-87f5-44b58636c682@chrome.browserless.io/webdriver';
/**
* Docker installation info in client.md
*/
const BROWSERLESS_VPS5_WEBDRIVER = 'http://madrid75008@51.38.44.123:3000/webdriver'; //using selenium
const BROWSERLESS_VPS5_FN = 'http://madrid75008@51.38.44.123:3000/function?ignoreHTTPSErrors'; //or ?token=aaaaaa
/**
* This is using chrome headless on browserless server using selenium/chromedriver protocol
* @param $waitFor css selector of the element we are waiting to be displayed to get the DOM. Timeout 30s
* TODO handle post params
*/
public static function fetchUsingPanther(string $url, ?array $postParams = [], ?string $waitFor = null, bool $isRemote = true):string {
if(count($postParams)) throw new \Exception('POST not implemented');
if ($isRemote) {
$client = self::createRemoteClient();
} else {
// To display chrome, in test, add `PANTHER_NO_HEADLESS=1` in environement variables
$client = Client::createChromeClient();
}
$crawler = $client->request('GET', $url);
if ($waitFor)
$client->waitFor($waitFor); // Wait for an element to be rendered
$html = $crawler->html();
// $client->takeScreenshot(__DIR__ . '/tmp_panther.png');
$client->quit(); //before $client->close(); don't know what the difference maybe if remote quit, if local close
return $html;
}
public static function createRemoteClient(): Client {
return Client::createSeleniumClient(self::BROWSERLESS_VPS5_WEBDRIVER, DesiredCapabilities::chrome()
->setCapability(ChromeOptions::CAPABILITY, (new ChromeOptions())
->addArguments(['headless', 'no-sandbox', 'ignore-certificate-errors'])
));
}
/**
* This is using chrome headless on browserless server using directly js
*/
public static function fetch(string $url, ?string $postQuerystring = null, ?string $waitFor = null, bool $isRemote = true, ?string $proxyUrl = null): string {
$html = self::fetchCustom(__DIR__ . '/headless_fetch.js', [
'purl' => $url,
'post' => $postQuerystring,
'waitFor' => $waitFor,
], $isRemote, $proxyUrl);
if (StringUtils::startsWith($html, 'Navigation timeout of') || StringUtils::startsWith($html, 'waiting for selector'))
throw new \RuntimeException('Browserless says: ' . $html);
return $html;
}
// TODO handle when script when in different folder
/**
* @param string $pathScript "/foo/bar/fetch.js"
*/
public static function fetchCustom(string $pathScript, array $params = [], bool $isRemote = true, ?string $proxyUrl = null) : string {
if(!file_exists($pathScript)) throw new \RuntimeException("File $pathScript not found");
if($isRemote) {
$proxyServer = null;
if ($proxyUrl) {
$parse = parse_url($proxyUrl);
if (isset($parse['user']) && isset($parse['pass'])) {
$params['proxyAuth'] = [
'username' => $parse['user'],
'password' => $parse['pass'],
];
}
$proxyServer = $parse['host'].(isset($parse['port'])?':'.$parse['port']:'');
}
// proxy : https://www.browserless.io/docs/chrome-flags#launching-with-a-proxy
$content = ScraperUtil::curl([
CURLOPT_URL => self::BROWSERLESS_VPS5_FN.($proxyServer ? '&--proxy-server='.$proxyServer : ''),
CURLOPT_POSTFIELDS => json_encode([
'code' => file_get_contents($pathScript),
'context' => $params,
]),
CURLOPT_HTTPHEADER => [
'Content-Type: application/json'
],
CURLOPT_TIMEOUT => ScraperUtil::TIMEOUT_S * 2,
]);
return $content;
} else { //Proxy not handled when not remote
//--inspect to be able to inspect/debug with chrome tool or Phpstorm (Run/Debug config > Attach to Node.js/Chrome)
$str = 'node --inspect '. __DIR__."/headless_custom_runner.js \"$pathScript\" ";
foreach ($params as $k=>$v) {
$str.="--$k=\"$v\" ";
}
return Utils::exec($str); //works when running local server with symfony server:run
}
}
}