From ab3c188e91cb86c6d74f2bd6a42f39d08fea8c65 Mon Sep 17 00:00:00 2001 From: Sudo-Ivan Date: Sat, 27 Dec 2025 03:15:42 -0600 Subject: [PATCH] Add RSS feed generation and improve security features - Implemented structured RSS feed generation using XML encoding. - Enhanced URL registration by incorporating a random salt for hash generation. - Introduced a bot blocker to the security middleware for improved bot detection. - Updated security middleware to utilize the new bot blocker and added more entropy to request fingerprinting. --- internal/api/handlers.go | 114 +++++++++++++++++----- internal/security/bot_blocker.go | 151 +++++++++++++++++++++++++++++ internal/security/security.go | 34 ++++--- internal/security/security_test.go | 3 +- 4 files changed, 266 insertions(+), 36 deletions(-) create mode 100644 internal/security/bot_blocker.go diff --git a/internal/api/handlers.go b/internal/api/handlers.go index 754830e..063ccab 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -1,8 +1,11 @@ package api import ( + "crypto/rand" "crypto/sha256" + "encoding/hex" "encoding/json" + "encoding/xml" "fmt" "io" "log" @@ -56,15 +59,22 @@ type Server struct { rssCache atomic.Value // stores string rssLastMod atomic.Value // stores time.Time avatarCache string + salt []byte } func NewServer(token string, initialSoftware []models.Software, statsService *stats.Service) *Server { + salt := make([]byte, 32) + if _, err := rand.Read(salt); err != nil { + log.Fatalf("Failed to generate random salt: %v", err) + } + s := &Server{ GiteaToken: token, SoftwareList: &SoftwareCache{data: initialSoftware}, Stats: statsService, urlMap: make(map[string]string), avatarCache: ".cache/avatars", + salt: salt, } s.rssCache.Store("") s.rssLastMod.Store(time.Time{}) @@ -77,7 +87,11 @@ func NewServer(token string, initialSoftware []models.Software, statsService *st } func (s *Server) RegisterURL(targetURL string) string { - hash := fmt.Sprintf("%x", sha256.Sum256([]byte(targetURL))) + h := sha256.New() + h.Write(s.salt) + h.Write([]byte(targetURL)) + hash := hex.EncodeToString(h.Sum(nil)) + s.urlMapMu.Lock() s.urlMap[hash] = targetURL s.urlMapMu.Unlock() @@ -344,6 +358,46 @@ func (s *Server) AvatarHandler(w http.ResponseWriter, r *http.Request) { _, _ = w.Write(data) } +type rssFeed struct { + XMLName xml.Name `xml:"rss"` + Version string `xml:"version,attr"` + Atom string `xml:"xmlns:atom,attr"` + Channel rssChannel `xml:"channel"` +} + +type rssChannel struct { + Title string `xml:"title"` + Link string `xml:"link"` + Description string `xml:"description"` + Language string `xml:"language"` + LastBuildDate string `xml:"lastBuildDate"` + AtomLink rssLink `xml:"atom:link"` + Items []rssItem `xml:"item"` +} + +type rssLink struct { + Href string `xml:"href,attr"` + Rel string `xml:"rel,attr"` + Type string `xml:"type,attr"` +} + +type rssItem struct { + Title string `xml:"title"` + Link string `xml:"link"` + Description rssDescription `xml:"description"` + GUID rssGUID `xml:"guid"` + PubDate string `xml:"pubDate"` +} + +type rssDescription struct { + Content string `xml:",cdata"` +} + +type rssGUID struct { + Content string `xml:",chardata"` + IsPermaLink bool `xml:"isPermaLink,attr"` +} + func (s *Server) RSSHandler(w http.ResponseWriter, r *http.Request) { softwareList := s.SoftwareList.Get() targetSoftware := r.URL.Query().Get("software") @@ -383,17 +437,22 @@ func (s *Server) RSSHandler(w http.ResponseWriter, r *http.Request) { selfLink = fmt.Sprintf("%s/api/rss?software=%s", baseURL, targetSoftware) } - var b strings.Builder - b.WriteString(` - - - ` + feedTitle + ` - ` + baseURL + ` - ` + feedDescription + ` - en-us - ` + time.Now().Format(time.RFC1123Z) + ` - -`) + feed := rssFeed{ + Version: "2.0", + Atom: "http://www.w3.org/2005/Atom", + Channel: rssChannel{ + Title: feedTitle, + Link: baseURL, + Description: feedDescription, + Language: "en-us", + LastBuildDate: time.Now().Format(time.RFC1123Z), + AtomLink: rssLink{ + Href: selfLink, + Rel: "self", + Type: "application/rss+xml", + }, + }, + } for i, it := range items { if i >= 50 { @@ -406,20 +465,27 @@ func (s *Server) RSSHandler(w http.ResponseWriter, r *http.Request) { description = it.Release.Body } - fmt.Fprintf(&b, ` - %s - %s - - %s-%s - %s - -`, title, link, description, it.Software.Name, it.Release.TagName, it.Release.CreatedAt.Format(time.RFC1123Z)) + feed.Channel.Items = append(feed.Channel.Items, rssItem{ + Title: title, + Link: link, + Description: rssDescription{ + Content: description, + }, + GUID: rssGUID{ + Content: fmt.Sprintf("%s-%s", it.Software.Name, it.Release.TagName), + IsPermaLink: false, + }, + PubDate: it.Release.CreatedAt.Format(time.RFC1123Z), + }) } - b.WriteString(` -`) - w.Header().Set("Content-Type", "application/rss+xml; charset=utf-8") w.Header().Set("Cache-Control", "public, max-age=300") - _, _ = w.Write([]byte(b.String())) + + fmt.Fprint(w, xml.Header) + enc := xml.NewEncoder(w) + enc.Indent("", " ") + if err := enc.Encode(feed); err != nil { + log.Printf("Error encoding RSS feed: %v", err) + } } diff --git a/internal/security/bot_blocker.go b/internal/security/bot_blocker.go new file mode 100644 index 0000000..94afd72 --- /dev/null +++ b/internal/security/bot_blocker.go @@ -0,0 +1,151 @@ +package security + +import ( + "bufio" + "log" + "net/http" + "os" + "path/filepath" + "strings" + "sync" + "time" +) + +type BotBlocker struct { + mu sync.RWMutex + badUAs map[string]struct{} + blocklistURLs []string + cacheFile string +} + +func NewBotBlocker(blocklistPath string) *BotBlocker { + bb := &BotBlocker{ + badUAs: make(map[string]struct{}), + cacheFile: ".cache/bad-user-agents.txt", + } + + if blocklistPath != "" { + // #nosec G304 + if file, err := os.Open(filepath.Clean(blocklistPath)); err == nil { + scanner := bufio.NewScanner(file) + for scanner.Scan() { + url := strings.TrimSpace(scanner.Text()) + if url != "" && !strings.HasPrefix(url, "#") { + bb.blocklistURLs = append(bb.blocklistURLs, url) + } + } + _ = file.Close() + } + } + + // Load existing cache if available + bb.loadFromCache() + + // If we have URLs, start background updater + if len(bb.blocklistURLs) > 0 { + go bb.startUpdater() + } + + return bb +} + +func (bb *BotBlocker) loadFromCache() { + if _, err := os.Stat(bb.cacheFile); err == nil { + if file, err := os.Open(bb.cacheFile); err == nil { + bb.mu.Lock() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + ua := strings.TrimSpace(scanner.Text()) + if ua != "" { + bb.badUAs[strings.ToLower(ua)] = struct{}{} + } + } + bb.mu.Unlock() + _ = file.Close() + } + } +} + +func (bb *BotBlocker) startUpdater() { + // Immediate fetch on start + bb.fetchAndRefresh() + + ticker := time.NewTicker(24 * time.Hour) + for range ticker.C { + bb.fetchAndRefresh() + } +} + +func (bb *BotBlocker) fetchAndRefresh() { + newUAs := make(map[string]struct{}) + client := &http.Client{Timeout: 30 * time.Second} + + for _, url := range bb.blocklistURLs { + resp, err := client.Get(url) + if err != nil { + log.Printf("Error fetching bot blocklist from %s: %v", url, err) + continue + } + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + ua := strings.TrimSpace(scanner.Text()) + if ua != "" && !strings.HasPrefix(ua, "#") { + newUAs[strings.ToLower(ua)] = struct{}{} + } + } + _ = resp.Body.Close() + } + + if len(newUAs) > 0 { + bb.mu.Lock() + bb.badUAs = newUAs + bb.mu.Unlock() + + // Save to cache + _ = os.MkdirAll(".cache", 0750) + if file, err := os.Create(bb.cacheFile); err == nil { + writer := bufio.NewWriter(file) + for ua := range newUAs { + _, _ = writer.WriteString(ua + "\n") + } + _ = writer.Flush() + _ = file.Close() + } + log.Printf("Bot blocklist updated with %d entries", len(newUAs)) + } +} + +func (bb *BotBlocker) IsBot(ua string) bool { + if ua == "" { + return false + } + uaLower := strings.ToLower(ua) + + // Check static list first (fast) + for _, bot := range BotUserAgents { + if strings.Contains(uaLower, bot) { + return true + } + } + + // Check dynamic list + bb.mu.RLock() + defer bb.mu.RUnlock() + + // Some lists contain partial strings, some contain exact matches. + // We'll do a partial match check for each entry in our dynamic list. + // This might be slow if the list is huge. + // Optimization: check exact match first, then partial if needed. + if _, ok := bb.badUAs[uaLower]; ok { + return true + } + + for badUA := range bb.badUAs { + if strings.Contains(uaLower, badUA) { + return true + } + } + + return false +} diff --git a/internal/security/security.go b/internal/security/security.go index fc2b065..c82d802 100644 --- a/internal/security/security.go +++ b/internal/security/security.go @@ -90,9 +90,23 @@ func GetRequestFingerprint(r *http.Request, s *stats.Service) string { ipStr = ip.String() } + // Improve fingerprinting with more entropy ua := r.Header.Get("User-Agent") + lang := r.Header.Get("Accept-Language") + enc := r.Header.Get("Accept-Encoding") + chUA := r.Header.Get("Sec-CH-UA") + hash := sha256.New() - hash.Write([]byte(ipStr + ua)) + hash.Write([]byte(ipStr)) + hash.Write([]byte("|")) + hash.Write([]byte(ua)) + hash.Write([]byte("|")) + hash.Write([]byte(lang)) + hash.Write([]byte("|")) + hash.Write([]byte(enc)) + hash.Write([]byte("|")) + hash.Write([]byte(chUA)) + fingerprint := hex.EncodeToString(hash.Sum(nil)) s.KnownHashes.Lock() @@ -168,12 +182,12 @@ func GetSafeHTTPClient(timeout time.Duration) *http.Client { } } -func SecurityMiddleware(s *stats.Service) func(http.Handler) http.Handler { +func SecurityMiddleware(s *stats.Service, bb *BotBlocker) func(http.Handler) http.Handler { return func(next http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { start := time.Now() path := strings.ToLower(r.URL.Path) - ua := strings.ToLower(r.UserAgent()) + ua := r.UserAgent() fingerprint := GetRequestFingerprint(r, s) ctx := context.WithValue(r.Context(), FingerprintKey, fingerprint) @@ -193,14 +207,12 @@ func SecurityMiddleware(s *stats.Service) func(http.Handler) http.Handler { s.GlobalStats.Unlock() }() - for _, bot := range BotUserAgents { - if strings.Contains(ua, bot) { - s.GlobalStats.Lock() - s.GlobalStats.BlockedRequests[fingerprint] = true - s.GlobalStats.Unlock() - http.Error(w, "Bots are not allowed", http.StatusForbidden) - return - } + if bb != nil && bb.IsBot(ua) { + s.GlobalStats.Lock() + s.GlobalStats.BlockedRequests[fingerprint] = true + s.GlobalStats.Unlock() + http.Error(w, "Bots are not allowed", http.StatusForbidden) + return } for _, pattern := range ForbiddenPatterns { diff --git a/internal/security/security_test.go b/internal/security/security_test.go index b3d94b9..69af34c 100644 --- a/internal/security/security_test.go +++ b/internal/security/security_test.go @@ -94,7 +94,8 @@ func TestGetRequestFingerprint(t *testing.T) { func TestSecurityMiddleware(t *testing.T) { statsService := stats.NewService("test-hashes.json") - handler := SecurityMiddleware(statsService)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + botBlocker := NewBotBlocker("") + handler := SecurityMiddleware(statsService, botBlocker)(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) }))