Files
webnews/internal/api/handlers.go

637 lines
15 KiB
Go

package api
import (
"crypto/rand"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"log"
"net"
"net/http"
"net/url"
"os"
"strings"
"sync"
"time"
"git.quad4.io/Go-Libs/RSS"
"git.quad4.io/Quad4-Software/webnews/internal/storage"
readability "github.com/go-shiori/go-readability"
"golang.org/x/sync/singleflight"
"golang.org/x/time/rate"
)
type ProxyResponse struct {
Feed FeedInfo `json:"feed"`
Articles []Article `json:"articles"`
}
type FeedInfo struct {
Title string `json:"title"`
SiteURL string `json:"siteUrl"`
Description string `json:"description"`
LastFetched int64 `json:"lastFetched"`
}
type Article struct {
ID string `json:"id"`
FeedID string `json:"feedId"`
Title string `json:"title"`
Link string `json:"link"`
Description string `json:"description"`
Author string `json:"author"`
PubDate int64 `json:"pubDate"`
Read bool `json:"read"`
Saved bool `json:"saved"`
ImageURL string `json:"imageUrl"`
}
type cacheEntry struct {
data any
expiresAt time.Time
}
type Cache struct {
entries sync.Map
TTL time.Duration
Enabled bool
Storage *storage.SQLiteDB
}
func (c *Cache) Get(key string) (any, bool) {
if !c.Enabled {
return nil, false
}
if c.Storage != nil {
data, err := c.Storage.GetCache(key)
if err != nil || data == nil {
return nil, false
}
var val any
if err := json.Unmarshal(data, &val); err != nil {
return nil, false
}
return val, true
}
val, ok := c.entries.Load(key)
if !ok {
return nil, false
}
entry := val.(cacheEntry)
if time.Now().After(entry.expiresAt) {
c.entries.Delete(key)
return nil, false
}
return entry.data, true
}
func (c *Cache) Set(key string, data any) {
if !c.Enabled {
return
}
if c.Storage != nil {
b, err := json.Marshal(data)
if err != nil {
return
}
_ = c.Storage.SetCache(key, b, c.TTL)
return
}
c.entries.Store(key, cacheEntry{
data: data,
expiresAt: time.Now().Add(c.TTL),
})
}
var FeedCache = &Cache{TTL: 10 * time.Minute, Enabled: false}
var FullTextCache = &Cache{TTL: 1 * time.Hour, Enabled: false}
var RequestGroup = &singleflight.Group{}
type RateLimiter struct {
clients map[string]*rate.Limiter
mu *sync.RWMutex
r rate.Limit
b int
File string
}
func NewRateLimiter(r rate.Limit, b int, file string) *RateLimiter {
rl := &RateLimiter{
clients: make(map[string]*rate.Limiter),
mu: &sync.RWMutex{},
r: r,
b: b,
File: file,
}
if file != "" {
rl.LoadHashes()
}
return rl
}
func (rl *RateLimiter) LoadHashes() {
rl.mu.Lock()
defer rl.mu.Unlock()
data, err := os.ReadFile(rl.File)
if err != nil {
return
}
var hashes []string
if err := json.Unmarshal(data, &hashes); err != nil {
return
}
for _, h := range hashes {
rl.clients[h] = rate.NewLimiter(rl.r, rl.b)
}
}
func (rl *RateLimiter) SaveHashes() {
rl.mu.RLock()
var hashes []string
for h := range rl.clients {
hashes = append(hashes, h)
}
rl.mu.RUnlock()
data, err := json.MarshalIndent(hashes, "", " ")
if err != nil {
log.Printf("Error marshaling rate limit hashes: %v", err)
return
}
if err := os.WriteFile(rl.File, data, 0600); err != nil {
log.Printf("Error writing rate limit hashes to %s: %v", rl.File, err)
}
}
func (rl *RateLimiter) GetLimiter(id string) *rate.Limiter {
rl.mu.RLock()
limiter, exists := rl.clients[id]
rl.mu.RUnlock()
if !exists {
rl.mu.Lock()
limiter = rate.NewLimiter(rl.r, rl.b)
rl.clients[id] = limiter
rl.mu.Unlock()
if rl.File != "" {
rl.SaveHashes()
}
}
return limiter
}
func (rl *RateLimiter) SetLimit(r rate.Limit, b int) {
rl.mu.Lock()
defer rl.mu.Unlock()
rl.r = r
rl.b = b
// Reset existing limiters to apply new rate
rl.clients = make(map[string]*rate.Limiter)
}
var Limiter = NewRateLimiter(rate.Limit(50), 100, "")
var ForbiddenPatterns = []string{
".git", ".env", ".aws", ".config", ".ssh",
"wp-admin", "wp-login", "phpinfo", ".php",
"etc/passwd", "cgi-bin",
}
func GetRealIP(r *http.Request) string {
ip, _, err := net.SplitHostPort(r.RemoteAddr)
if err != nil {
ip = r.RemoteAddr
}
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
if comma := strings.IndexByte(xff, ','); comma != -1 {
return strings.TrimSpace(xff[:comma])
}
return strings.TrimSpace(xff)
}
if xri := r.Header.Get("X-Real-IP"); xri != "" {
return strings.TrimSpace(xri)
}
return ip
}
func BotBlockerMiddleware(next http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
path := strings.ToLower(r.URL.Path)
for _, pattern := range ForbiddenPatterns {
if strings.Contains(path, pattern) {
ip := GetRealIP(r)
log.Printf("Blocked suspicious request: %s from %s", r.URL.String(), ip)
http.Error(w, "Forbidden", http.StatusForbidden)
return
}
}
next.ServeHTTP(w, r)
}
}
type AuthManager struct {
Mode string // "none", "token", "multi"
MasterToken string
AllowRegistration bool
AuthFile string
Tokens map[string]bool
mu sync.RWMutex
}
func NewAuthManager(mode, token, file string, allowReg bool) *AuthManager {
am := &AuthManager{
Mode: mode,
MasterToken: token,
AllowRegistration: allowReg,
AuthFile: file,
Tokens: make(map[string]bool),
}
if mode == "multi" && file != "" {
am.LoadTokens()
}
return am
}
func (am *AuthManager) LoadTokens() {
am.mu.Lock()
defer am.mu.Unlock()
data, err := os.ReadFile(am.AuthFile)
if err != nil {
if os.IsNotExist(err) {
return
}
log.Printf("Error reading auth file: %v", err)
return
}
var tokens []string
if err := json.Unmarshal(data, &tokens); err != nil {
log.Printf("Error parsing auth file: %v", err)
return
}
for _, t := range tokens {
am.Tokens[t] = true
}
}
func (am *AuthManager) SaveTokens() {
am.mu.RLock()
var tokens []string
for t := range am.Tokens {
tokens = append(tokens, t)
}
am.mu.RUnlock()
data, err := json.MarshalIndent(tokens, "", " ")
if err != nil {
log.Printf("Error marshaling tokens: %v", err)
return
}
if err := os.WriteFile(am.AuthFile, data, 0600); err != nil {
log.Printf("Error writing auth file: %v", err)
}
}
func (am *AuthManager) Validate(token string) bool {
if am.Mode == "none" {
return true
}
if am.Mode == "token" {
return token == am.MasterToken
}
if am.Mode == "multi" {
am.mu.RLock()
defer am.mu.RUnlock()
return am.Tokens[token]
}
return false
}
func (am *AuthManager) Register() (string, error) {
if am.Mode != "multi" || !am.AllowRegistration {
return "", http.ErrNotSupported
}
b := make([]byte, 8)
if _, err := io.ReadFull(rand.Reader, b); err != nil {
return "", err
}
token := hex.EncodeToString(b)
formatted := token[0:4] + "-" + token[4:8] + "-" + token[8:12] + "-" + token[12:16]
am.mu.Lock()
am.Tokens[formatted] = true
am.mu.Unlock()
am.SaveTokens()
return formatted, nil
}
func AuthMiddleware(am *AuthManager, next http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if am.Mode == "none" {
next.ServeHTTP(w, r)
return
}
token := r.Header.Get("X-Account-Number")
if token == "" {
authHeader := r.Header.Get("Authorization")
if strings.HasPrefix(authHeader, "Bearer ") {
token = strings.TrimPrefix(authHeader, "Bearer ")
}
}
if !am.Validate(token) {
w.Header().Set("WWW-Authenticate", `Bearer realm="Web News"`)
http.Error(w, "Unauthorized: Invalid Account Number", http.StatusUnauthorized)
return
}
next.ServeHTTP(w, r)
}
}
func LimitMiddleware(next http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
ip := GetRealIP(r)
ua := r.Header.Get("User-Agent")
hash := sha256.New()
hash.Write([]byte(ip + ua))
clientID := hex.EncodeToString(hash.Sum(nil))
l := Limiter.GetLimiter(clientID)
if !l.Allow() {
http.Error(w, http.StatusText(http.StatusTooManyRequests), http.StatusTooManyRequests)
return
}
next.ServeHTTP(w, r)
}
}
func HandleFeedProxy(w http.ResponseWriter, r *http.Request) {
if r.Method == "OPTIONS" {
w.WriteHeader(http.StatusOK)
return
}
feedURL := r.URL.Query().Get("url")
if feedURL == "" {
http.Error(w, "Missing url parameter", http.StatusBadRequest)
return
}
if data, ok := FeedCache.Get(feedURL); ok {
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(data); err != nil {
log.Printf("Error encoding cached feed proxy response: %v", err)
}
return
}
val, err, _ := RequestGroup.Do(feedURL, func() (any, error) {
client := &http.Client{Timeout: 15 * time.Second}
req, err := http.NewRequest("GET", feedURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
// Add browser-like headers to avoid being blocked by Cloudflare/Bot protection
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
req.Header.Set("Accept", "application/rss+xml, application/xml;q=0.9, text/xml;q=0.8, */*;q=0.7")
req.Header.Set("Cache-Control", "no-cache")
req.Header.Set("Pragma", "no-cache")
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to fetch feed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("feed returned status %s", resp.Status)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to read feed body: %w", err)
}
parsedFeed, err := rss.Parse(data)
if err != nil {
return nil, fmt.Errorf("failed to parse feed: %w", err)
}
articles := make([]Article, 0, len(parsedFeed.Items))
for _, item := range parsedFeed.Items {
id := item.GUID
if id == "" {
id = item.Link
}
pubDate := time.Now().UnixMilli()
if item.Published != nil {
pubDate = item.Published.UnixMilli()
}
author := ""
if item.Author != nil {
author = item.Author.Name
}
imageURL := ""
for _, enc := range item.Enclosures {
if enc.Type == "image/jpeg" || enc.Type == "image/png" || enc.Type == "image/gif" {
imageURL = enc.URL
break
}
}
articles = append(articles, Article{
ID: id,
FeedID: feedURL,
Title: item.Title,
Link: item.Link,
Description: item.Description,
Author: author,
PubDate: pubDate,
Read: false,
Saved: false,
ImageURL: imageURL,
})
}
response := ProxyResponse{
Feed: FeedInfo{
Title: parsedFeed.Title,
SiteURL: parsedFeed.Link,
Description: parsedFeed.Description,
LastFetched: time.Now().UnixMilli(),
},
Articles: articles,
}
FeedCache.Set(feedURL, response)
return response, nil
})
if err != nil {
if strings.Contains(err.Error(), "status") {
http.Error(w, err.Error(), http.StatusBadGateway)
} else {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(val); err != nil {
log.Printf("Error encoding feed proxy response: %v", err)
}
}
func HandleProxy(w http.ResponseWriter, r *http.Request) {
if r.Method == "OPTIONS" {
w.WriteHeader(http.StatusOK)
return
}
targetURL := r.URL.Query().Get("url")
if targetURL == "" {
http.Error(w, "Missing url parameter", http.StatusBadRequest)
return
}
client := &http.Client{Timeout: 15 * time.Second}
req, err := http.NewRequest("GET", targetURL, nil)
if err != nil {
http.Error(w, "Failed to create request: "+err.Error(), http.StatusInternalServerError)
return
}
// Add browser-like headers
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
resp, err := client.Do(req)
if err != nil {
http.Error(w, "Failed to fetch URL: "+err.Error(), http.StatusInternalServerError)
return
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
http.Error(w, "Target returned status "+resp.Status, http.StatusBadGateway)
return
}
w.Header().Set("Content-Type", resp.Header.Get("Content-Type"))
if _, err := io.Copy(w, resp.Body); err != nil {
log.Printf("Error copying proxy response body: %v", err)
}
}
type FullTextResponse struct {
Title string `json:"title"`
Content string `json:"content"`
TextContent string `json:"textContent"`
Excerpt string `json:"excerpt"`
Byline string `json:"byline"`
SiteName string `json:"siteName"`
Image string `json:"image"`
Favicon string `json:"favicon"`
URL string `json:"url"`
}
func HandleFullText(w http.ResponseWriter, r *http.Request) {
if r.Method == "OPTIONS" {
w.WriteHeader(http.StatusOK)
return
}
targetURL := r.URL.Query().Get("url")
if targetURL == "" {
http.Error(w, "Missing url parameter", http.StatusBadRequest)
return
}
if data, ok := FullTextCache.Get(targetURL); ok {
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(data); err != nil {
log.Printf("Error encoding cached fulltext response: %v", err)
}
return
}
val, err, _ := RequestGroup.Do("ft-"+targetURL, func() (any, error) {
parsedURL, _ := url.Parse(targetURL)
article, err := readability.FromURL(targetURL, 15*time.Second)
if err != nil {
client := &http.Client{Timeout: 15 * time.Second}
req, err := http.NewRequest("GET", targetURL, nil)
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to fetch content: %w", err)
}
defer resp.Body.Close()
article, err = readability.FromReader(resp.Body, parsedURL)
if err != nil {
return nil, fmt.Errorf("failed to extract content: %w", err)
}
}
response := FullTextResponse{
Title: article.Title,
Content: article.Content,
TextContent: article.TextContent,
Excerpt: article.Excerpt,
Byline: article.Byline,
SiteName: article.SiteName,
Image: article.Image,
Favicon: article.Favicon,
URL: targetURL,
}
FullTextCache.Set(targetURL, response)
return response, nil
})
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(val); err != nil {
log.Printf("Error encoding fulltext response: %v", err)
}
}