fix
This commit is contained in:
125
internal/ingest/alert_outbox.go
Normal file
125
internal/ingest/alert_outbox.go
Normal file
@@ -0,0 +1,125 @@
|
||||
package ingest
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.apinb.com/ops/logs/internal/impl"
|
||||
"git.apinb.com/ops/logs/internal/models"
|
||||
)
|
||||
|
||||
const (
|
||||
outboxStatusPending = "pending"
|
||||
outboxStatusRetrying = "retrying"
|
||||
outboxStatusSent = "sent"
|
||||
outboxStatusDead = "dead"
|
||||
)
|
||||
|
||||
func enqueueAlert(logEventID uint, body AlertReceiveBody) error {
|
||||
payload, err := json.Marshal(body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
row := models.AlertOutbox{
|
||||
LogEventID: logEventID,
|
||||
PayloadJSON: string(payload),
|
||||
Status: outboxStatusPending,
|
||||
RetryCount: 0,
|
||||
NextRetryAt: time.Now(),
|
||||
LastError: "",
|
||||
}
|
||||
return impl.DBService.Create(&row).Error
|
||||
}
|
||||
|
||||
func StartAlertDispatcher() {
|
||||
go func() {
|
||||
ticker := time.NewTicker(2 * time.Second)
|
||||
defer ticker.Stop()
|
||||
for range ticker.C {
|
||||
processAlertOutboxBatch(20)
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
func processAlertOutboxBatch(limit int) {
|
||||
if limit <= 0 {
|
||||
limit = 20
|
||||
}
|
||||
var rows []models.AlertOutbox
|
||||
now := time.Now()
|
||||
err := impl.DBService.
|
||||
Where("status IN ? AND next_retry_at <= ?", []string{outboxStatusPending, outboxStatusRetrying}, now).
|
||||
Order("id asc").
|
||||
Limit(limit).
|
||||
Find(&rows).Error
|
||||
if err != nil || len(rows) == 0 {
|
||||
return
|
||||
}
|
||||
for _, row := range rows {
|
||||
processOneOutbox(row)
|
||||
}
|
||||
}
|
||||
|
||||
func processOneOutbox(row models.AlertOutbox) {
|
||||
var body AlertReceiveBody
|
||||
if err := json.Unmarshal([]byte(row.PayloadJSON), &body); err != nil {
|
||||
markOutboxDead(row.ID, row.RetryCount, "invalid_payload: "+err.Error())
|
||||
return
|
||||
}
|
||||
if err := forwardAlert(body); err != nil {
|
||||
markOutboxRetry(row, err.Error())
|
||||
return
|
||||
}
|
||||
_ = impl.DBService.Model(&models.AlertOutbox{}).Where("id = ?", row.ID).Updates(map[string]interface{}{
|
||||
"status": outboxStatusSent,
|
||||
"last_error": "",
|
||||
"next_retry_at": time.Now(),
|
||||
}).Error
|
||||
_ = impl.DBService.Model(&models.LogEvent{}).Where("id = ?", row.LogEventID).Updates(map[string]interface{}{
|
||||
"alert_sent": true,
|
||||
"dispatch_status": "sent",
|
||||
}).Error
|
||||
}
|
||||
|
||||
func markOutboxRetry(row models.AlertOutbox, msg string) {
|
||||
retry := row.RetryCount + 1
|
||||
const maxRetry = 5
|
||||
if retry > maxRetry {
|
||||
markOutboxDead(row.ID, retry, msg)
|
||||
return
|
||||
}
|
||||
backoff := time.Duration(retry*retry) * time.Second
|
||||
if backoff > 60*time.Second {
|
||||
backoff = 60 * time.Second
|
||||
}
|
||||
_ = impl.DBService.Model(&models.AlertOutbox{}).Where("id = ?", row.ID).Updates(map[string]interface{}{
|
||||
"status": outboxStatusRetrying,
|
||||
"retry_count": retry,
|
||||
"next_retry_at": time.Now().Add(backoff),
|
||||
"last_error": truncateError(msg, 1024),
|
||||
}).Error
|
||||
_ = impl.DBService.Model(&models.LogEvent{}).Where("id = ?", row.LogEventID).Update("dispatch_status", "retrying").Error
|
||||
}
|
||||
|
||||
func markOutboxDead(id uint, retry int, msg string) {
|
||||
_ = impl.DBService.Model(&models.AlertOutbox{}).Where("id = ?", id).Updates(map[string]interface{}{
|
||||
"status": outboxStatusDead,
|
||||
"retry_count": retry,
|
||||
"next_retry_at": time.Now(),
|
||||
"last_error": truncateError(msg, 1024),
|
||||
}).Error
|
||||
var row models.AlertOutbox
|
||||
if err := impl.DBService.Select("log_event_id").First(&row, id).Error; err == nil && row.LogEventID > 0 {
|
||||
_ = impl.DBService.Model(&models.LogEvent{}).Where("id = ?", row.LogEventID).Update("dispatch_status", "dead").Error
|
||||
}
|
||||
}
|
||||
|
||||
func truncateError(s string, n int) string {
|
||||
s = strings.TrimSpace(s)
|
||||
if len(s) <= n {
|
||||
return s
|
||||
}
|
||||
return s[:n]
|
||||
}
|
||||
|
||||
11
internal/ingest/alert_outbox_test.go
Normal file
11
internal/ingest/alert_outbox_test.go
Normal file
@@ -0,0 +1,11 @@
|
||||
package ingest
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestTruncateError(t *testing.T) {
|
||||
got := truncateError(" abcdef ", 3)
|
||||
if got != "abc" {
|
||||
t.Fatalf("unexpected value: %q", got)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -24,6 +24,27 @@ type Engine struct {
|
||||
syslogRules []models.SyslogRule
|
||||
trapRules []models.TrapRule
|
||||
shields []models.TrapShield
|
||||
resourceByIP map[string]resourceRef
|
||||
resourceByHN map[string]resourceRef
|
||||
}
|
||||
|
||||
type resourceRef struct {
|
||||
ResourceType string
|
||||
ResourceID string
|
||||
ResourceName string
|
||||
}
|
||||
|
||||
func resourceTypePriority(resourceType string) int {
|
||||
switch strings.ToLower(strings.TrimSpace(resourceType)) {
|
||||
case "server":
|
||||
return 3
|
||||
case "collector":
|
||||
return 2
|
||||
case "device":
|
||||
return 1
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
var Global = &Engine{}
|
||||
@@ -33,6 +54,7 @@ func (e *Engine) Refresh() error {
|
||||
var syslog []models.SyslogRule
|
||||
var trap []models.TrapRule
|
||||
var shield []models.TrapShield
|
||||
var mappings []models.ResourceMapping
|
||||
|
||||
if err := impl.DBService.Where("enabled = ?", true).Find(&dict).Error; err != nil {
|
||||
return err
|
||||
@@ -54,12 +76,51 @@ func (e *Engine) Refresh() error {
|
||||
if err := impl.DBService.Where("enabled = ?", true).Find(&shield).Error; err != nil {
|
||||
return err
|
||||
}
|
||||
if err := impl.DBService.Where("is_deleted = ?", false).Order("updated_at desc, id desc").Find(&mappings).Error; err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ipMap := make(map[string]resourceRef)
|
||||
hnMap := make(map[string]resourceRef)
|
||||
for _, m := range mappings {
|
||||
ref := resourceRef{
|
||||
ResourceType: m.ResourceType,
|
||||
ResourceID: m.ResourceID,
|
||||
ResourceName: m.ResourceName,
|
||||
}
|
||||
var ips []string
|
||||
if err := json.Unmarshal([]byte(m.IPsJSON), &ips); err == nil {
|
||||
for _, ip := range ips {
|
||||
key := strings.TrimSpace(ip)
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
if cur, exists := ipMap[key]; !exists || resourceTypePriority(ref.ResourceType) > resourceTypePriority(cur.ResourceType) {
|
||||
ipMap[key] = ref
|
||||
}
|
||||
}
|
||||
}
|
||||
var hostnames []string
|
||||
if err := json.Unmarshal([]byte(m.HostnamesJSON), &hostnames); err == nil {
|
||||
for _, hn := range hostnames {
|
||||
key := strings.ToLower(strings.TrimSpace(hn))
|
||||
if key == "" {
|
||||
continue
|
||||
}
|
||||
if cur, exists := hnMap[key]; !exists || resourceTypePriority(ref.ResourceType) > resourceTypePriority(cur.ResourceType) {
|
||||
hnMap[key] = ref
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
e.mu.Lock()
|
||||
e.trapDict = dict
|
||||
e.syslogRules = syslog
|
||||
e.trapRules = trap
|
||||
e.shields = shield
|
||||
e.resourceByIP = ipMap
|
||||
e.resourceByHN = hnMap
|
||||
e.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
@@ -99,14 +160,21 @@ func (e *Engine) HandleSyslog(addr *net.UDPAddr, payload []byte) {
|
||||
detailBytes, _ := json.Marshal(detailObj)
|
||||
summary := formatSyslogSummary(parsed)
|
||||
sev := syslogPriorityToSeverity(parsed.Priority)
|
||||
ref, method := e.resolveResource(addr.IP.String(), device)
|
||||
|
||||
ev := models.LogEvent{
|
||||
SourceKind: "syslog",
|
||||
RemoteAddr: addr.String(),
|
||||
SourceIP: addr.IP.String(),
|
||||
RawPayload: string(payload),
|
||||
NormalizedSummary: summary,
|
||||
NormalizedDetail: string(detailBytes),
|
||||
DeviceName: device,
|
||||
ResourceType: ref.ResourceType,
|
||||
ResourceID: ref.ResourceID,
|
||||
ResourceName: ref.ResourceName,
|
||||
MatchMethod: method,
|
||||
DispatchStatus: "not_applicable",
|
||||
SeverityCode: sev,
|
||||
}
|
||||
|
||||
@@ -166,8 +234,8 @@ func (e *Engine) HandleSyslog(addr *net.UDPAddr, payload []byte) {
|
||||
PolicyID: matched.PolicyID,
|
||||
RawData: rawBytes,
|
||||
}
|
||||
if err := forwardAlert(body); err == nil {
|
||||
_ = impl.DBService.Model(&ev).Update("alert_sent", true).Error
|
||||
if err := enqueueAlert(ev.ID, body); err == nil {
|
||||
_ = impl.DBService.Model(&ev).Update("dispatch_status", "pending").Error
|
||||
}
|
||||
}
|
||||
|
||||
@@ -204,10 +272,7 @@ func trapShielded(e *Engine, addr *net.UDPAddr, trapOID string, pkt *gosnmp.Snmp
|
||||
if !s.Enabled {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(s.SourceIPCIDR) == "" {
|
||||
continue
|
||||
}
|
||||
if !ipMatchesCIDR(ip, s.SourceIPCIDR) {
|
||||
if cidr := strings.TrimSpace(s.SourceIPCIDR); cidr != "" && !ipMatchesCIDR(ip, cidr) {
|
||||
continue
|
||||
}
|
||||
if p := strings.TrimSpace(s.OIDPrefix); p != "" && !strings.HasPrefix(normOID(trapOID), normOID(p)) {
|
||||
@@ -265,14 +330,21 @@ func (e *Engine) HandleTrap(addr *net.UDPAddr, pkt *gosnmp.SnmpPacket) {
|
||||
}
|
||||
}
|
||||
detailBytes, _ := json.Marshal(detailObj)
|
||||
ref, method := e.resolveResource(addr.IP.String(), addr.IP.String())
|
||||
|
||||
ev := models.LogEvent{
|
||||
SourceKind: "snmp_trap",
|
||||
RemoteAddr: addr.String(),
|
||||
SourceIP: addr.IP.String(),
|
||||
RawPayload: fp,
|
||||
NormalizedSummary: readable,
|
||||
NormalizedDetail: string(detailBytes),
|
||||
DeviceName: addr.IP.String(),
|
||||
ResourceType: ref.ResourceType,
|
||||
ResourceID: ref.ResourceID,
|
||||
ResourceName: ref.ResourceName,
|
||||
MatchMethod: method,
|
||||
DispatchStatus: "not_applicable",
|
||||
SeverityCode: sev,
|
||||
TrapOID: trapOID,
|
||||
}
|
||||
@@ -360,8 +432,8 @@ func (e *Engine) HandleTrap(addr *net.UDPAddr, pkt *gosnmp.SnmpPacket) {
|
||||
PolicyID: matched.PolicyID,
|
||||
RawData: rawBytes,
|
||||
}
|
||||
if err := forwardAlert(body); err == nil {
|
||||
_ = impl.DBService.Model(&ev).Update("alert_sent", true).Error
|
||||
if err := enqueueAlert(ev.ID, body); err == nil {
|
||||
_ = impl.DBService.Model(&ev).Update("dispatch_status", "pending").Error
|
||||
}
|
||||
}
|
||||
|
||||
@@ -440,3 +512,18 @@ func firstNonEmpty(a, b string) string {
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func (e *Engine) resolveResource(sourceIP, hostname string) (resourceRef, string) {
|
||||
e.mu.RLock()
|
||||
ipMap := e.resourceByIP
|
||||
hnMap := e.resourceByHN
|
||||
e.mu.RUnlock()
|
||||
|
||||
if ref, ok := ipMap[strings.TrimSpace(sourceIP)]; ok {
|
||||
return ref, "ip"
|
||||
}
|
||||
if ref, ok := hnMap[strings.ToLower(strings.TrimSpace(hostname))]; ok {
|
||||
return ref, "hostname"
|
||||
}
|
||||
return resourceRef{}, "none"
|
||||
}
|
||||
|
||||
49
internal/ingest/resource_resolver_test.go
Normal file
49
internal/ingest/resource_resolver_test.go
Normal file
@@ -0,0 +1,49 @@
|
||||
package ingest
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestResolveResourceByIPFirst(t *testing.T) {
|
||||
e := &Engine{
|
||||
resourceByIP: map[string]resourceRef{
|
||||
"10.0.0.10": {ResourceType: "server", ResourceID: "srv-10", ResourceName: "s10"},
|
||||
},
|
||||
resourceByHN: map[string]resourceRef{
|
||||
"host-a": {ResourceType: "device", ResourceID: "dev-a", ResourceName: "a"},
|
||||
},
|
||||
}
|
||||
ref, method := e.resolveResource("10.0.0.10", "host-a")
|
||||
if method != "ip" {
|
||||
t.Fatalf("method=%s", method)
|
||||
}
|
||||
if ref.ResourceID != "srv-10" {
|
||||
t.Fatalf("resource id=%s", ref.ResourceID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveResourceByHostname(t *testing.T) {
|
||||
e := &Engine{
|
||||
resourceByIP: map[string]resourceRef{},
|
||||
resourceByHN: map[string]resourceRef{
|
||||
"host-a": {ResourceType: "device", ResourceID: "dev-a", ResourceName: "a"},
|
||||
},
|
||||
}
|
||||
ref, method := e.resolveResource("10.0.0.20", "HOST-A")
|
||||
if method != "hostname" {
|
||||
t.Fatalf("method=%s", method)
|
||||
}
|
||||
if ref.ResourceID != "dev-a" {
|
||||
t.Fatalf("resource id=%s", ref.ResourceID)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveResourceNoMatch(t *testing.T) {
|
||||
e := &Engine{
|
||||
resourceByIP: map[string]resourceRef{},
|
||||
resourceByHN: map[string]resourceRef{},
|
||||
}
|
||||
_, method := e.resolveResource("10.0.0.20", "host-b")
|
||||
if method != "none" {
|
||||
t.Fatalf("method=%s", method)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ func inTimeWindows(now time.Time, jsonStr string) bool {
|
||||
}
|
||||
var windows []timeWindow
|
||||
if err := json.Unmarshal([]byte(s), &windows); err != nil || len(windows) == 0 {
|
||||
return true
|
||||
return false
|
||||
}
|
||||
tod := now.Hour()*60 + now.Minute()
|
||||
wd := int(now.Weekday())
|
||||
|
||||
@@ -46,8 +46,20 @@ func parseSyslogPayload(payload []byte) ParsedSyslog {
|
||||
tokens := strings.SplitN(rest, " ", 3)
|
||||
if len(tokens) >= 2 {
|
||||
if len(tokens) >= 3 && isMonthAbbr(tokens[0]) {
|
||||
p.Hostname = tokens[2]
|
||||
if idx := strings.Index(rest, ": "); idx > 0 {
|
||||
parts := strings.Fields(rest)
|
||||
if len(parts) >= 4 && isDayOfMonth(parts[1]) && isHHMMSS(parts[2]) {
|
||||
p.Hostname = parts[3]
|
||||
if len(parts) > 4 {
|
||||
tagMsg := strings.Join(parts[4:], " ")
|
||||
if idx := strings.Index(tagMsg, ": "); idx > 0 {
|
||||
p.Tag = tagMsg[:idx]
|
||||
p.Message = strings.TrimSpace(tagMsg[idx+2:])
|
||||
} else {
|
||||
p.Message = tagMsg
|
||||
}
|
||||
}
|
||||
} else if idx := strings.Index(rest, ": "); idx > 0 {
|
||||
// 兼容无法严格按 RFC3164 切分的历史格式。
|
||||
p.Message = strings.TrimSpace(rest[idx+2:])
|
||||
}
|
||||
} else {
|
||||
@@ -66,6 +78,28 @@ func parseSyslogPayload(payload []byte) ParsedSyslog {
|
||||
return p
|
||||
}
|
||||
|
||||
func isDayOfMonth(s string) bool {
|
||||
n, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return n >= 1 && n <= 31
|
||||
}
|
||||
|
||||
func isHHMMSS(s string) bool {
|
||||
parts := strings.Split(s, ":")
|
||||
if len(parts) != 3 {
|
||||
return false
|
||||
}
|
||||
h, err1 := strconv.Atoi(parts[0])
|
||||
m, err2 := strconv.Atoi(parts[1])
|
||||
sec, err3 := strconv.Atoi(parts[2])
|
||||
if err1 != nil || err2 != nil || err3 != nil {
|
||||
return false
|
||||
}
|
||||
return h >= 0 && h <= 23 && m >= 0 && m <= 59 && sec >= 0 && sec <= 59
|
||||
}
|
||||
|
||||
func isMonthAbbr(s string) bool {
|
||||
if len(s) < 3 {
|
||||
return false
|
||||
|
||||
@@ -2,7 +2,12 @@ package ingest
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.apinb.com/ops/logs/internal/models"
|
||||
"github.com/gosnmp/gosnmp"
|
||||
)
|
||||
|
||||
func TestParseSyslogPayloadPri(t *testing.T) {
|
||||
@@ -12,6 +17,19 @@ func TestParseSyslogPayloadPri(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSyslogPayloadRFC3164Hostname(t *testing.T) {
|
||||
p := parseSyslogPayload([]byte("Oct 11 22:14:15 mymachine su: failed"))
|
||||
if p.Hostname != "mymachine" {
|
||||
t.Fatalf("hostname=%q", p.Hostname)
|
||||
}
|
||||
if p.Tag != "su" {
|
||||
t.Fatalf("tag=%q", p.Tag)
|
||||
}
|
||||
if p.Message != "failed" {
|
||||
t.Fatalf("message=%q", p.Message)
|
||||
}
|
||||
}
|
||||
|
||||
func TestForwardAlertBodyIncludesRawData(t *testing.T) {
|
||||
raw := []byte(`{"source":"syslog","parsed":{}}`)
|
||||
b := AlertReceiveBody{
|
||||
@@ -30,3 +48,29 @@ func TestForwardAlertBodyIncludesRawData(t *testing.T) {
|
||||
t.Fatalf("raw_data %s", dec["raw_data"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestInTimeWindowsInvalidJSONReturnsFalse(t *testing.T) {
|
||||
now := time.Date(2026, 1, 1, 10, 0, 0, 0, time.Local)
|
||||
if inTimeWindows(now, "{invalid") {
|
||||
t.Fatal("invalid json should not be treated as always effective")
|
||||
}
|
||||
}
|
||||
|
||||
func TestTrapShieldedAllowsEmptySourceIPCIDR(t *testing.T) {
|
||||
e := &Engine{
|
||||
shields: []models.TrapShield{
|
||||
{
|
||||
Enabled: true,
|
||||
SourceIPCIDR: "",
|
||||
OIDPrefix: "1.3.6.1.4.1",
|
||||
InterfaceHint: "",
|
||||
TimeWindowsJSON: "",
|
||||
},
|
||||
},
|
||||
}
|
||||
addr := &net.UDPAddr{IP: net.ParseIP("10.0.0.1"), Port: 162}
|
||||
pkt := &gosnmp.SnmpPacket{}
|
||||
if !trapShielded(e, addr, "1.3.6.1.4.1.999", pkt) {
|
||||
t.Fatal("shield should match when source_ip_cidr is empty and other conditions match")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user