mirror of
https://codeberg.org/readeck/readeck.git
synced 2025-12-22 05:07:08 +00:00
Merge branch 'release'
This commit is contained in:
23
pkg/extract/contentscripts/assets/site-config/as-web.jp.json
Normal file
23
pkg/extract/contentscripts/assets/site-config/as-web.jp.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"title_selectors": null,
|
||||
"body_selectors": null,
|
||||
"date_selectors": null,
|
||||
"author_selectors": null,
|
||||
"strip_selectors": null,
|
||||
"strip_id_or_class": null,
|
||||
"strip_image_src": null,
|
||||
"native_ad_selectors": null,
|
||||
"tidy": false,
|
||||
"prune": false,
|
||||
"autodetect_on_failure": true,
|
||||
"single_page_link_selectors": null,
|
||||
"next_page_link_selectors": null,
|
||||
"replace_strings": null,
|
||||
"http_headers": null,
|
||||
"tests": [
|
||||
{
|
||||
"url": "https://www.as-web.jp/f1/1275289",
|
||||
"contains": []
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
{
|
||||
"title_selectors": [
|
||||
"substring-before(//meta[@property='og:title']/@content , ' | Business Insider Japan')"
|
||||
],
|
||||
"body_selectors": [
|
||||
"//div[contains(concat(' ',normalize-space(@class),' '),' article_pArticle_Body ')]"
|
||||
],
|
||||
"date_selectors": null,
|
||||
"author_selectors": null,
|
||||
"strip_selectors": null,
|
||||
"strip_id_or_class": [
|
||||
"bws-avoid",
|
||||
"displayhandler_sm__stLxT",
|
||||
"displayhandler_lg__cvCnu"
|
||||
],
|
||||
"strip_image_src": null,
|
||||
"native_ad_selectors": null,
|
||||
"tidy": false,
|
||||
"prune": false,
|
||||
"autodetect_on_failure": true,
|
||||
"single_page_link_selectors": null,
|
||||
"next_page_link_selectors": [
|
||||
"//a[contains(text(),\"次ページ\")]"
|
||||
],
|
||||
"replace_strings": null,
|
||||
"http_headers": null,
|
||||
"tests": [
|
||||
{
|
||||
"url": "https://www.businessinsider.jp/article/2511-mobile-4-carriers-strategy-analysis/",
|
||||
"contains": []
|
||||
},
|
||||
{
|
||||
"url": "https://www.businessinsider.jp/article/2511-canon-semiconductor-business/",
|
||||
"contains": []
|
||||
},
|
||||
{
|
||||
"url": "https://www.businessinsider.jp/article/2511-google-company-turnaround-moment-reasons-ai-race-gemini/",
|
||||
"contains": []
|
||||
},
|
||||
{
|
||||
"url": "https://www.businessinsider.jp/article/2511-google-deepmind-cracks-century-old-physics-mystery-ai-fluid-dynamics/",
|
||||
"contains": []
|
||||
},
|
||||
{
|
||||
"url": "https://www.businessinsider.jp/article/2511-ai-bubble-dot-com-crash-data-centers-infrastructure-jeremy-siegel/",
|
||||
"contains": []
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"title_selectors": [
|
||||
"substring-before(//meta[@property='og:title']/@content , ' | 毎日新聞')"
|
||||
],
|
||||
"body_selectors": null,
|
||||
"date_selectors": null,
|
||||
"author_selectors": null,
|
||||
"strip_selectors": null,
|
||||
"strip_id_or_class": [
|
||||
"ad-articledetail-2"
|
||||
],
|
||||
"strip_image_src": null,
|
||||
"native_ad_selectors": null,
|
||||
"tidy": false,
|
||||
"prune": false,
|
||||
"autodetect_on_failure": true,
|
||||
"single_page_link_selectors": null,
|
||||
"next_page_link_selectors": null,
|
||||
"replace_strings": null,
|
||||
"http_headers": null,
|
||||
"tests": [
|
||||
{
|
||||
"url": "https://mainichi.jp/articles/20251122/k00/00m/010/037000c",
|
||||
"contains": []
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"title_selectors": null,
|
||||
"body_selectors": [
|
||||
"//div[@class=\"content-area\"]"
|
||||
],
|
||||
"date_selectors": null,
|
||||
"author_selectors": null,
|
||||
"strip_selectors": null,
|
||||
"strip_id_or_class": [
|
||||
"cmp-lst016",
|
||||
"cmp-misc010",
|
||||
"cmp-hdg005"
|
||||
],
|
||||
"strip_image_src": null,
|
||||
"native_ad_selectors": null,
|
||||
"tidy": false,
|
||||
"prune": false,
|
||||
"autodetect_on_failure": true,
|
||||
"single_page_link_selectors": null,
|
||||
"next_page_link_selectors": null,
|
||||
"replace_strings": null,
|
||||
"http_headers": null,
|
||||
"tests": [
|
||||
{
|
||||
"url": "https://www.tokyo-np.co.jp/article/454831",
|
||||
"contains": []
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -13,6 +13,7 @@ import (
|
||||
"log/slog"
|
||||
"mime"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"net/url"
|
||||
"path"
|
||||
"regexp"
|
||||
@@ -81,19 +82,53 @@ func NewDrop(src *url.URL) *Drop {
|
||||
|
||||
// SetURL sets the Drop's URL and Domain properties in their unicode versions.
|
||||
func (d *Drop) SetURL(src *url.URL) {
|
||||
// First, copy url and ensure it's a unicode version
|
||||
var uri *url.URL
|
||||
domain := ""
|
||||
if src != nil {
|
||||
uri = new(url.URL)
|
||||
*uri = *src
|
||||
if src == nil {
|
||||
d.URL = nil
|
||||
d.Domain = ""
|
||||
d.Site = ""
|
||||
return
|
||||
}
|
||||
|
||||
uri := new(url.URL)
|
||||
*uri = *src
|
||||
|
||||
// Remove port when it's not needed
|
||||
// Note: only numeric ports are valid in [url.URL].
|
||||
port := uri.Port()
|
||||
if uri.Scheme == "http" && port == "80" || uri.Scheme == "https" && port == "443" {
|
||||
port = ""
|
||||
// we want to keep the brackets on ipv6 here
|
||||
uri.Host = uri.Host[:strings.LastIndexByte(uri.Host, ':')]
|
||||
}
|
||||
|
||||
hostname := uri.Hostname()
|
||||
|
||||
if ip, err := netip.ParseAddr(hostname); err == nil {
|
||||
// Hostname is an IP address. Shorten the address and use it as the domain.
|
||||
s := ip.String()
|
||||
if ip.Is6() {
|
||||
uri.Host = "[" + s + "]"
|
||||
} else {
|
||||
uri.Host = s
|
||||
}
|
||||
if port != "" {
|
||||
uri.Host += ":" + port
|
||||
}
|
||||
|
||||
d.Domain = s
|
||||
} else {
|
||||
// Always encode the URL to unicode
|
||||
if host, err := idna.ToUnicode(uri.Host); err == nil {
|
||||
uri.Host = host
|
||||
}
|
||||
domain, _ = publicsuffix.EffectiveTLDPlusOne(uri.Hostname())
|
||||
d.Domain, _ = publicsuffix.EffectiveTLDPlusOne(uri.Hostname())
|
||||
}
|
||||
|
||||
if d.Domain == "" {
|
||||
d.Domain = hostname
|
||||
}
|
||||
|
||||
d.URL = uri
|
||||
d.Domain = domain
|
||||
}
|
||||
|
||||
// Load loads the remote URL and retrieve data.
|
||||
|
||||
@@ -97,6 +97,26 @@ func TestDrop(t *testing.T) {
|
||||
"http://example.net/test/test",
|
||||
"example.net",
|
||||
},
|
||||
{
|
||||
"http://example.net:8888/test/test",
|
||||
"http://example.net:8888/test/test",
|
||||
"example.net",
|
||||
},
|
||||
{
|
||||
"http://example.net:80/test/test",
|
||||
"http://example.net/test/test",
|
||||
"example.net",
|
||||
},
|
||||
{
|
||||
"http://example.net:80/test/test",
|
||||
"http://example.net/test/test",
|
||||
"example.net",
|
||||
},
|
||||
{
|
||||
"https://example.net:443/test/test",
|
||||
"https://example.net/test/test",
|
||||
"example.net",
|
||||
},
|
||||
{
|
||||
"http://belgië.icom.museum/€test",
|
||||
"http://belgië.icom.museum/€test",
|
||||
@@ -117,6 +137,31 @@ func TestDrop(t *testing.T) {
|
||||
"http://example.co.jp",
|
||||
"example.co.jp",
|
||||
},
|
||||
{
|
||||
"http://127.0.0.1:5000",
|
||||
"http://127.0.0.1:5000",
|
||||
"127.0.0.1",
|
||||
},
|
||||
{
|
||||
"http://[fd66:2244:0::0:1]:5000",
|
||||
"http://[fd66:2244::1]:5000",
|
||||
"fd66:2244::1",
|
||||
},
|
||||
{
|
||||
"http://[::1]/",
|
||||
"http://[::1]/",
|
||||
"::1",
|
||||
},
|
||||
{
|
||||
"http://[::1]:80/",
|
||||
"http://[::1]/",
|
||||
"::1",
|
||||
},
|
||||
{
|
||||
"https://[fd66:8282::a]:443/",
|
||||
"https://[fd66:8282::a]/",
|
||||
"fd66:8282::a",
|
||||
},
|
||||
}
|
||||
|
||||
for _, x := range tests {
|
||||
|
||||
24
web/package-lock.json
generated
24
web/package-lock.json
generated
@@ -3087,9 +3087,9 @@
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/baseline-browser-mapping": {
|
||||
"version": "2.9.5",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.5.tgz",
|
||||
"integrity": "sha512-D5vIoztZOq1XM54LUdttJVc96ggEsIfju2JBvht06pSzpckp3C7HReun67Bghzrtdsq9XdMGbSSB3v3GhMNmAA==",
|
||||
"version": "2.9.10",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.10.tgz",
|
||||
"integrity": "sha512-2VIKvDx8Z1a9rTB2eCkdPE5nSe28XnA+qivGnWHoB40hMMt/h1hSz0960Zqsn6ZyxWXUie0EBdElKv8may20AA==",
|
||||
"license": "Apache-2.0",
|
||||
"bin": {
|
||||
"baseline-browser-mapping": "dist/cli.js"
|
||||
@@ -3285,9 +3285,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/caniuse-lite": {
|
||||
"version": "1.0.30001760",
|
||||
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001760.tgz",
|
||||
"integrity": "sha512-7AAMPcueWELt1p3mi13HR/LHH0TJLT11cnwDJEs3xA4+CK/PLKeO9Kl1oru24htkyUKtkGCvAx4ohB0Ttry8Dw==",
|
||||
"version": "1.0.30001761",
|
||||
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001761.tgz",
|
||||
"integrity": "sha512-JF9ptu1vP2coz98+5051jZ4PwQgd2ni8A+gYSN7EA7dPKIMf0pDlSUxhdmVOaV3/fYK5uWBkgSXJaRLr4+3A6g==",
|
||||
"funding": [
|
||||
{
|
||||
"type": "opencollective",
|
||||
@@ -12154,9 +12154,9 @@
|
||||
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA=="
|
||||
},
|
||||
"baseline-browser-mapping": {
|
||||
"version": "2.9.5",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.5.tgz",
|
||||
"integrity": "sha512-D5vIoztZOq1XM54LUdttJVc96ggEsIfju2JBvht06pSzpckp3C7HReun67Bghzrtdsq9XdMGbSSB3v3GhMNmAA=="
|
||||
"version": "2.9.10",
|
||||
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.9.10.tgz",
|
||||
"integrity": "sha512-2VIKvDx8Z1a9rTB2eCkdPE5nSe28XnA+qivGnWHoB40hMMt/h1hSz0960Zqsn6ZyxWXUie0EBdElKv8may20AA=="
|
||||
},
|
||||
"binary-extensions": {
|
||||
"version": "2.3.0",
|
||||
@@ -12288,9 +12288,9 @@
|
||||
}
|
||||
},
|
||||
"caniuse-lite": {
|
||||
"version": "1.0.30001760",
|
||||
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001760.tgz",
|
||||
"integrity": "sha512-7AAMPcueWELt1p3mi13HR/LHH0TJLT11cnwDJEs3xA4+CK/PLKeO9Kl1oru24htkyUKtkGCvAx4ohB0Ttry8Dw=="
|
||||
"version": "1.0.30001761",
|
||||
"resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001761.tgz",
|
||||
"integrity": "sha512-JF9ptu1vP2coz98+5051jZ4PwQgd2ni8A+gYSN7EA7dPKIMf0pDlSUxhdmVOaV3/fYK5uWBkgSXJaRLr4+3A6g=="
|
||||
},
|
||||
"chalk": {
|
||||
"version": "4.1.2",
|
||||
|
||||
Reference in New Issue
Block a user