PHP Class Spatie\Crawler\Crawler

Show file Open project: spatie/crawler Class Usage Examples

Protected Properties

Property Type Description
$baseUrl Url
$baseUrl Spatie\Crawler\Url;
$client GuzzleHttp\Client
$concurrency integer
$crawlObserver Spatie\Crawler\CrawlObserver
$crawlProfile Spatie\Crawler\CrawlProfile
$crawlQueue CrawlQueue
$crawledUrls Illuminate\Support\Collection

Public Methods

Method Description
__construct ( Client $client )
__construct ( Client $client, integer $concurrency = 10 )
create ( ) : static
create ( array $clientOptions = [] ) : static
setConcurrency ( integer $concurrency )
setCrawlObserver ( Spatie\Crawler\CrawlObserver $crawlObserver )
setCrawlProfile ( Spatie\Crawler\CrawlProfile $crawlProfile )
startCrawling ( Url | string $baseUrl )

Protected Methods

Method Description
addAllLinksToCrawlQueue ( string $html, Url $foundOnUrl )
crawlAllLinks ( string $html ) Crawl all links in the given html.
crawlUrl ( Url $url ) Crawl the given url.
extractAllLinks ( string $html ) : Collection
getAllLinks ( string $html ) : Url[] Get all links in the given html.
getCrawlRequests ( ) : Generato\Generator
handleResponse ( Psr\Http\Message\ResponseInterface | null $response, integer $index )
hasAlreadyCrawled ( Url $url ) : boolean Determine if the crawled has already crawled the given url.
normalizeUrl ( Url $url ) Normalize the given url.
normalizeUrl ( Url $url ) : Url
startCrawlingQueue ( )

Method Details

__construct() public method

public __construct ( Client $client )
$client GuzzleHttp\Client

__construct() public method

public __construct ( Client $client, integer $concurrency = 10 )
$client GuzzleHttp\Client
$concurrency integer

addAllLinksToCrawlQueue() protected method

protected addAllLinksToCrawlQueue ( string $html, Url $foundOnUrl )
$html string
$foundOnUrl Url

crawlUrl() protected method

Crawl the given url.
protected crawlUrl ( Url $url )
$url Url

create() public static method

public static create ( ) : static
return static

create() public static method

public static create ( array $clientOptions = [] ) : static
$clientOptions array
return static

getCrawlRequests() protected method

protected getCrawlRequests ( ) : Generato\Generator
return Generato\Generator

handleResponse() protected method

protected handleResponse ( Psr\Http\Message\ResponseInterface | null $response, integer $index )
$response Psr\Http\Message\ResponseInterface | null
$index integer

hasAlreadyCrawled() protected method

Determine if the crawled has already crawled the given url.
protected hasAlreadyCrawled ( Url $url ) : boolean
$url Url
return boolean

normalizeUrl() protected method

Normalize the given url.
protected normalizeUrl ( Url $url )
$url Url

normalizeUrl() protected method

protected normalizeUrl ( Url $url ) : Url
$url Url
return Url

setConcurrency() public method

public setConcurrency ( integer $concurrency )
$concurrency integer

setCrawlObserver() public method

public setCrawlObserver ( Spatie\Crawler\CrawlObserver $crawlObserver )
$crawlObserver Spatie\Crawler\CrawlObserver

setCrawlProfile() public method

public setCrawlProfile ( Spatie\Crawler\CrawlProfile $crawlProfile )
$crawlProfile Spatie\Crawler\CrawlProfile

startCrawling() public method

public startCrawling ( Url | string $baseUrl )
$baseUrl Url | string

startCrawlingQueue() protected method

protected startCrawlingQueue ( )

Property Details

$baseUrl protected property

protected Url,Spatie\Crawler $baseUrl
return Url

$baseUrl protected property

protected Url;,Spatie\Crawler $baseUrl
return Spatie\Crawler\Url;

$client protected property

protected Client,GuzzleHttp $client
return GuzzleHttp\Client

$concurrency protected property

protected int $concurrency
return integer

$crawlObserver protected property

protected CrawlObserver,Spatie\Crawler $crawlObserver
return Spatie\Crawler\CrawlObserver

$crawlProfile protected property

protected CrawlProfile,Spatie\Crawler $crawlProfile
return Spatie\Crawler\CrawlProfile

$crawlQueue protected property

protected CrawlQueue,Spatie\Crawler $crawlQueue
return CrawlQueue

$crawledUrls protected property

protected Collection,Illuminate\Support $crawledUrls
return Illuminate\Support\Collection