Method | Description | |
---|---|---|
import ( array $current_import ) : boolean | ||
kneadHtml ( string $html, string $type, string $domain ) : string | Pummel the HTML into WordPress compatible dough. | |
kneadandInsert ( $html, string $post_type, integer $chapter_parent, string $domain ) | Pummel then insert HTML into our database | |
setCurrentImportOption ( array $upload ) : boolean |
Method | Description | |
---|---|---|
extractCCLicense ( string $url ) : string | Expects a URL string with Creative Commons domain similar in form to: http://creativecommons.org/licenses/by-sa/4.0/ | |
fetchAndSaveUniqueImage ( string $url ) : string | Extract url and load into WP using media_handle_sideload() Will return an empty string if something went wrong. | |
getAuthors ( string $html ) : array | Looks for meta data in the section of an HTML document. | |
getLicenseAttribution ( string $html ) : array | Looks for div class created by the license module in PB, returns author and license information. | |
regexSearchReplace ( string $html ) : string | Cherry pick likely content areas, then cull known, unwanted content areas | |
scrapeAndKneadImages ( DOMDocument $doc, string $domain ) : DOMDocument | Parse HTML snippet, save all found tags using media_handle_sideload(), return the HTML with changed paths. | |
scrapeAndKneadMeta ( DOMDocument $doc ) : array | Extracts section/book author and section/book license if they exist. | |
tidy ( string $html ) : string | Compliance with XHTML standards, rid cruft generated by word processors |
protected extractCCLicense ( string $url ) : string | ||
$url | string | |
return | string | license meta value |
protected fetchAndSaveUniqueImage ( string $url ) : string | ||
$url | string | |
return | string | $src |
protected getAuthors ( string $html ) : array | ||
$html | string | |
return | array | $authors |
protected getLicenseAttribution ( string $html ) : array | ||
$html | string | |
return | array | $meta |
protected regexSearchReplace ( string $html ) : string | ||
$html | string | |
return | string | $html |
protected scrapeAndKneadImages ( DOMDocument $doc, string $domain ) : DOMDocument | ||
$doc | DOMDocument | |
$domain | string | domain name of the webpage |
return | DOMDocument |
protected scrapeAndKneadMeta ( DOMDocument $doc ) : array | ||
$doc | DOMDocument | |
return | array | $meta |
public setCurrentImportOption ( array $upload ) : boolean | ||
$upload | array | |
return | boolean |