mirror of
https://codeberg.org/readeck/readeck.git
synced 2025-12-23 13:40:17 +00:00
Fixed arstechnica content-script
- the body selector is now div.post-content
- removed the now useless replaceStrings
- give priority to {Article,NewsArticle,Web}.description in JSON-LD,
before *.description
See https://community.readeck.org/d/113-some-webpages-content-not-being-extracted
This commit is contained in:
@@ -18,11 +18,11 @@ exports.isActive = function () {
|
||||
*/
|
||||
exports.setConfig = function (config) {
|
||||
switch (true) {
|
||||
case $.domain == "arstechnica.co.uk":
|
||||
$.overrideConfig(config, "https://arstechnica.com/")
|
||||
case $.domain == "arstechnica.com":
|
||||
config.replaceStrings = [
|
||||
['" data-src="', '"><img src="'],
|
||||
['" data-responsive="', '" /><span data-responsive="'],
|
||||
['<figure style="', '</span><figure data-style="'],
|
||||
config.bodySelectors = [
|
||||
"//div[contains(concat(' ',normalize-space(@class),' '),' post-content ')]",
|
||||
]
|
||||
break
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ func ExtractMeta(m *extract.ProcessMessage, next extract.Processor) extract.Proc
|
||||
if headline, ok := md.getProp("Article.name", "*.headline", "{Movie,VideObject}.name").(string); ok {
|
||||
d.Title = headline
|
||||
}
|
||||
if description, ok := md.getProp("*.description").(string); ok {
|
||||
if description, ok := md.getProp("{Article,NewsArticle,WebPage}.description", "*.description").(string); ok {
|
||||
d.Description = description
|
||||
}
|
||||
if image, ok := md.getProp("*.{image,image.url,thumbnailUrl}").(string); ok {
|
||||
|
||||
Reference in New Issue
Block a user