diff --git a/docs/docs/content/bounces.md b/docs/docs/content/bounces.md index 922f64f0..b0c967f2 100644 --- a/docs/docs/content/bounces.md +++ b/docs/docs/content/bounces.md @@ -14,6 +14,9 @@ Configure the bounce mailbox in Settings -> Bounces. Either the "From" e-mail th Some mail servers may also return the bounce to the `Reply-To` address, which can also be added to the header settings. +### Bounce classification +listmonk applies a series of heuristics looking for keywords in the bounced mail body to guess if it is a 'soft' bounce or a 'hard' bounce. For instance, 4.x.x and 5.x.x error status codes, common strings such as "mailbox not found" etc. If none of the heuristics match, then the bounce mail is considered to be 'soft' by default. + ## Webhook API The bounce webhook API can be used to record bounce events with custom scripting. This could be by reading a mailbox, a database, or mail server logs. @@ -22,14 +25,14 @@ The bounce webhook API can be used to record bounce events with custom scripting | `POST` | /webhooks/bounce | Record a bounce event. | -| Name | Type | Required | Description | -| ----------------| --------- | -----------| ------------------------------------------------------------------------------------ | -| subscriber_uuid | string | | The UUID of the subscriber. Either this or `email` is required. | -| email | string | | The e-mail of the subscriber. Either this or `subscriber_uuid` is required. | -| campaign_uuid | string | | UUID of the campaign for which the bounce happened. | -| source | string | Yes | A string indicating the source, eg: `api`, `my_script` etc. | -| type | string | Yes | `hard` or `soft` bounce. Currently, this has no effect on how the bounce is treated. | -| meta | string | | An optional escaped JSON string with arbitrary metadata about the bounce event. | +| Name | Type | Required | Description | +| --------------- | ------ | -------- | ------------------------------------------------------------------------------------ | +| subscriber_uuid | string | | The UUID of the subscriber. Either this or `email` is required. | +| email | string | | The e-mail of the subscriber. Either this or `subscriber_uuid` is required. | +| campaign_uuid | string | | UUID of the campaign for which the bounce happened. | +| source | string | Yes | A string indicating the source, eg: `api`, `my_script` etc. | +| type | string | Yes | `hard` or `soft` bounce. Currently, this has no effect on how the bounce is treated. | +| meta | string | | An optional escaped JSON string with arbitrary metadata about the bounce event. | ```shell @@ -43,11 +46,11 @@ curl -u 'api_username:access_token' -X POST 'http://localhost:9000/webhooks/boun listmonk supports receiving bounce webhook events from the following SMTP providers. | Endpoint | Description | More info | -|:--------------------------------------------------------------|:---------------------------------------|:----------------------------------------------------------------------------------------------------------------------| +| :------------------------------------------------------------ | :------------------------------------- | :-------------------------------------------------------------------------------------------------------------------- | | `https://listmonk.yoursite.com/webhooks/service/ses` | Amazon (AWS) SES | See below | | `https://listmonk.yoursite.com/webhooks/service/sendgrid` | Sendgrid / Twilio Signed event webhook | [More info](https://docs.sendgrid.com/for-developers/tracking-events/getting-started-event-webhook-security-features) | | `https://listmonk.yoursite.com/webhooks/service/postmark` | Postmark webhook | [More info](https://postmarkapp.com/developer/webhooks/webhooks-overview) | -| `https://listmonk.yoursite.com/webhooks/service/forwardemail` | Forward Email webhook | [More info](https://forwardemail.net/en/faq#do-you-support-bounce-webhooks) | +| `https://listmonk.yoursite.com/webhooks/service/forwardemail` | Forward Email webhook | [More info](https://forwardemail.net/en/faq#do-you-support-bounce-webhooks) | ## Amazon Simple Email Service (SES) diff --git a/internal/bounce/bounce.go b/internal/bounce/bounce.go index 9a49b108..a990f6e0 100644 --- a/internal/bounce/bounce.go +++ b/internal/bounce/bounce.go @@ -125,6 +125,7 @@ func (m *Manager) Run() { // runMailboxScanner runs a blocking loop that scans the mailbox at given intervals. func (m *Manager) runMailboxScanner() { for { + m.log.Printf("scanning bounce mailbox %s", m.opt.Mailbox.Host) if err := m.mailbox.Scan(1000, m.queue); err != nil { m.log.Printf("error scanning bounce mailbox: %v", err) } diff --git a/internal/bounce/mailbox/pop.go b/internal/bounce/mailbox/pop.go index 6520ea03..511c003d 100644 --- a/internal/bounce/mailbox/pop.go +++ b/internal/bounce/mailbox/pop.go @@ -2,6 +2,7 @@ package mailbox import ( "encoding/json" + "fmt" "io" "regexp" "strings" @@ -24,6 +25,15 @@ type bounceHeaders struct { Regexp *regexp.Regexp } +type bounceMeta struct { + From string `json:"from"` + Subject string `json:"subject"` + MessageID string `json:"message_id"` + DeliveredTo string `json:"delivered_to"` + Received []string `json:"received"` + ClassifyReason string `json:"classify_reason"` +} + var ( // List of header to look for in the e-mail body, regexp to fall back to if the header is empty. headerLookups = []bounceHeaders{ @@ -37,6 +47,14 @@ var ( } reHdrReceived = regexp.MustCompile(`(?m)(?:^` + models.EmailHeaderReceived + `:\s+?)(.*)`) + + // SMTP status code (5.x.x or 4.x.x) to classify hard/soft bounces. + reSMTPStatus = regexp.MustCompile(`(?m)(?i)^(?:Status:\s*)?(?:\d{3}\s+)?([45]\.\d+\.\d+)`) + + // List of (conventional) strings to guess hard bounces. + reHardBounce = regexp.MustCompile(`(?i)(NXDOMAIN|user unknown|address not found|mailbox not found|address.*reject|does not exist|` + + `invalid recipient|no such user|recipient.*invalid|undeliverable|permanent.*failure|permanent.*error|` + + `bad.*address|unknown.*user|account.*disabled|address.*disabled)`) ) // NewPOP returns a new instance of the POP mailbox client. @@ -52,6 +70,38 @@ func NewPOP(opt Opt) *POP { } } +// classifyBounce analyzes the bounce message content and determines if it's a hard or soft bounce. +// It checks SMTP status codes, diagnostic headers, and bounce keywords (using string heuristics). +// soft is the default preference. +// Returns the bounce type and a classification reason containing context about what matched. +func classifyBounce(b []byte) (string, string) { + if matches := reSMTPStatus.FindAllSubmatch(b, -1); matches != nil { + for _, m := range matches { + if len(m) >= 2 && len(m[0]) > 1 { + // Full status code (e.g., "5.1.1"). + status := m[1] + + // 5.x.x is hard bounce. + if status[0] == '5' { + return models.BounceTypeHard, fmt.Sprintf("smtp_status=%s", status) + } + + // 4.x.x is soft bounce. + if status[0] == '4' { + return models.BounceTypeSoft, fmt.Sprintf("smtp_status=%s", status) + } + } + } + } + + // Check for explicit hard bounce keywords. + if match := reHardBounce.FindSubmatch(b); match != nil { + return models.BounceTypeHard, fmt.Sprintf("body_match=%s", match[1]) + } + + return models.BounceTypeSoft, "default" +} + // Scan scans the mailbox and pushes the downloaded messages into the given channel. // The messages that are downloaded are deleted from the server. If limit > 0, // all messages on the server are downloaded and deleted. @@ -147,24 +197,23 @@ func (p *POP) Scan(limit int, ch chan models.Bounce) error { date = time.Now() } + // Classify the bounce type based on message content. + bounceType, bounceReason := classifyBounce(b.Bytes()) + // Additional bounce e-mail metadata. - meta, _ := json.Marshal(struct { - From string `json:"from"` - Subject string `json:"subject"` - MessageID string `json:"message_id"` - DeliveredTo string `json:"delivered_to"` - Received []string `json:"received"` - }{ - From: hdr[models.EmailHeaderFrom], - Subject: hdr[models.EmailHeaderSubject], - MessageID: hdr[models.EmailHeaderMessageId], - DeliveredTo: hdr[models.EmailHeaderDeliveredTo], - Received: msgReceived, + fmt.Println(bounceReason) + meta, _ := json.Marshal(bounceMeta{ + From: hdr[models.EmailHeaderFrom], + Subject: hdr[models.EmailHeaderSubject], + MessageID: hdr[models.EmailHeaderMessageId], + DeliveredTo: hdr[models.EmailHeaderDeliveredTo], + Received: msgReceived, + ClassifyReason: bounceReason, }) select { case ch <- models.Bounce{ - Type: "hard", + Type: bounceType, CampaignUUID: hdr[models.EmailHeaderCampaignUUID], SubscriberUUID: hdr[models.EmailHeaderSubscriberUUID], Source: p.opt.Host,