Property | Type | Description | |
---|---|---|---|
$readability |
Method | Description | |
---|---|---|
__construct ( array $config = [], Psr\Log\LoggerInterface $logger = null, |
||
buildSiteConfig ( string $url, string $html = '', boolean $addToCache = true ) : |
Returns SiteConfig instance (joined in order: exact match, wildcard, fingerprint, global, default). | |
findHostUsingFingerprints ( string $html ) : string | false | Try to find a host depending on a meta that can be in the html. | |
getContent ( ) | ||
getLanguage ( ) | ||
getNextPageUrl ( ) | ||
getSiteConfig ( ) | ||
getTitle ( ) | ||
process ( string $html, string $url, |
$smartTidy indicates that if tidy is used and no results are produced, we will try again without it. | |
reset ( ) | ||
setLogger ( Psr\Log\LoggerInterface $logger ) |
Method | Description | |
---|---|---|
extractBody ( boolean $detectBody, string $xpathExpression, DOMNode $node, string $type ) : boolean | Extract body from a given CSS for a node. | |
extractTitle ( boolean $detectTitle, string $cssClass, DOMNode $node, string $logMessage ) : boolean | Extract title for a given CSS class a node. | |
hasElements ( DOMNodeList $elems ) : boolean | Check if given node list exists and has length more than 0. | |
removeElements ( DOMNodeList $elems, string $logMessage = null ) | Remove elements. |
public __construct ( array $config = [], Psr\Log\LoggerInterface $logger = null, |
||
$config | array | |
$logger | Psr\Log\LoggerInterface | |
$configBuilder |
public findHostUsingFingerprints ( string $html ) : string | false | ||
$html | string | |
return | string | false |
public process ( string $html, string $url, |
||
$html | string | |
$url | string | |
$siteConfig | Will avoid to recalculate the site config | |
$smartTidy | boolean | Do we need to tidy the html ? |
return | boolean | true on success, false on failure |
public setLogger ( Psr\Log\LoggerInterface $logger ) | ||
$logger | Psr\Log\LoggerInterface |