1// Package queue is in charge of outgoing messages, queueing them when submitted,
2// attempting a first delivery over SMTP, retrying with backoff and sending DSNs
3// for delayed or failed deliveries.
4package queue
5
6import (
7 "bytes"
8 "context"
9 "errors"
10 "fmt"
11 "io"
12 "log/slog"
13 "net"
14 "os"
15 "path/filepath"
16 "runtime/debug"
17 "slices"
18 "strings"
19 "time"
20
21 "golang.org/x/net/proxy"
22
23 "github.com/prometheus/client_golang/prometheus"
24 "github.com/prometheus/client_golang/prometheus/promauto"
25
26 "github.com/mjl-/bstore"
27
28 "github.com/mjl-/mox/config"
29 "github.com/mjl-/mox/dns"
30 "github.com/mjl-/mox/dsn"
31 "github.com/mjl-/mox/metrics"
32 "github.com/mjl-/mox/mlog"
33 "github.com/mjl-/mox/mox-"
34 "github.com/mjl-/mox/moxio"
35 "github.com/mjl-/mox/moxvar"
36 "github.com/mjl-/mox/smtp"
37 "github.com/mjl-/mox/smtpclient"
38 "github.com/mjl-/mox/store"
39 "github.com/mjl-/mox/tlsrpt"
40 "github.com/mjl-/mox/tlsrptdb"
41 "github.com/mjl-/mox/webapi"
42 "github.com/mjl-/mox/webhook"
43)
44
45// ErrFromID indicate a fromid was present when adding a message to the queue, but
46// it wasn't unique.
47var ErrFromID = errors.New("fromid not unique")
48
49var (
50 metricConnection = promauto.NewCounterVec(
51 prometheus.CounterOpts{
52 Name: "mox_queue_connection_total",
53 Help: "Queue client connections, outgoing.",
54 },
55 []string{
56 "result", // "ok", "timeout", "canceled", "error"
57 },
58 )
59 metricDelivery = promauto.NewHistogramVec(
60 prometheus.HistogramOpts{
61 Name: "mox_queue_delivery_duration_seconds",
62 Help: "SMTP client delivery attempt to single host.",
63 Buckets: []float64{0.01, 0.05, 0.100, 0.5, 1, 5, 10, 20, 30, 60, 120},
64 },
65 []string{
66 "attempt", // Number of attempts.
67 "transport", // empty for default direct delivery.
68 "tlsmode", // immediate, requiredstarttls, opportunistic, skip (from smtpclient.TLSMode), with optional +mtasts and/or +dane.
69 "result", // ok, timeout, canceled, temperror, permerror, error
70 },
71 )
72 metricHold = promauto.NewGauge(
73 prometheus.GaugeOpts{
74 Name: "mox_queue_hold",
75 Help: "Messages in queue that are on hold.",
76 },
77 )
78)
79
80var jitter = mox.NewPseudoRand()
81
82var DBTypes = []any{Msg{}, HoldRule{}, MsgRetired{}, webapi.Suppression{}, Hook{}, HookRetired{}} // Types stored in DB.
83var DB *bstore.DB // Exported for making backups.
84
85// Allow requesting delivery starting from up to this interval from time of submission.
86const FutureReleaseIntervalMax = 60 * 24 * time.Hour
87
88// Set for mox localserve, to prevent queueing.
89var Localserve bool
90
91// HoldRule is a set of conditions that cause a matching message to be marked as on
92// hold when it is queued. All-empty conditions matches all messages, effectively
93// pausing the entire queue.
94type HoldRule struct {
95 ID int64
96 Account string
97 SenderDomain dns.Domain
98 RecipientDomain dns.Domain
99 SenderDomainStr string // Unicode.
100 RecipientDomainStr string // Unicode.
101}
102
103func (pr HoldRule) All() bool {
104 pr.ID = 0
105 return pr == HoldRule{}
106}
107
108func (pr HoldRule) matches(m Msg) bool {
109 return pr.All() || pr.Account == m.SenderAccount || pr.SenderDomainStr == m.SenderDomainStr || pr.RecipientDomainStr == m.RecipientDomainStr
110}
111
112// Msg is a message in the queue.
113//
114// Use MakeMsg to make a message with fields that Add needs. Add will further set
115// queueing related fields.
116type Msg struct {
117 ID int64
118
119 // A message for multiple recipients will get a BaseID that is identical to the
120 // first Msg.ID queued. The message contents will be identical for each recipient,
121 // including MsgPrefix. If other properties are identical too, including recipient
122 // domain, multiple Msgs may be delivered in a single SMTP transaction. For
123 // messages with a single recipient, this field will be 0.
124 BaseID int64 `bstore:"index"`
125
126 Queued time.Time `bstore:"default now"`
127 Hold bool // If set, delivery won't be attempted.
128 SenderAccount string // Failures are delivered back to this local account. Also used for routing.
129 SenderLocalpart smtp.Localpart // Should be a local user and domain.
130 SenderDomain dns.IPDomain
131 SenderDomainStr string // For filtering, unicode.
132 FromID string // For transactional messages, used to match later DSNs.
133 RecipientLocalpart smtp.Localpart // Typically a remote user and domain.
134 RecipientDomain dns.IPDomain
135 RecipientDomainStr string // For filtering, unicode domain. Can also contain ip enclosed in [].
136 Attempts int // Next attempt is based on last attempt and exponential back off based on attempts.
137 MaxAttempts int // Max number of attempts before giving up. If 0, then the default of 8 attempts is used instead.
138 DialedIPs map[string][]net.IP // For each host, the IPs that were dialed. Used for IP selection for later attempts.
139 NextAttempt time.Time // For scheduling.
140 LastAttempt *time.Time
141 Results []MsgResult
142
143 Has8bit bool // Whether message contains bytes with high bit set, determines whether 8BITMIME SMTP extension is needed.
144 SMTPUTF8 bool // Whether message requires use of SMTPUTF8.
145 IsDMARCReport bool // Delivery failures for DMARC reports are handled differently.
146 IsTLSReport bool // Delivery failures for TLS reports are handled differently.
147 Size int64 // Full size of message, combined MsgPrefix with contents of message file.
148 MessageID string // Message-ID header, including <>. Used when composing a DSN, in its References header.
149 MsgPrefix []byte // Data to send before the contents from the file, typically with headers like DKIM-Signature.
150 Subject string // For context about delivery.
151
152 // If set, this message is a DSN and this is a version using utf-8, for the case
153 // the remote MTA supports smtputf8. In this case, Size and MsgPrefix are not
154 // relevant.
155 DSNUTF8 []byte
156
157 // If non-empty, the transport to use for this message. Can be set through cli or
158 // admin interface. If empty (the default for a submitted message), regular routing
159 // rules apply.
160 Transport string
161
162 // RequireTLS influences TLS verification during delivery.
163 //
164 // If nil, the recipient domain policy is followed (MTA-STS and/or DANE), falling
165 // back to optional opportunistic non-verified STARTTLS.
166 //
167 // If RequireTLS is true (through SMTP REQUIRETLS extension or webmail submit),
168 // MTA-STS or DANE is required, as well as REQUIRETLS support by the next hop
169 // server.
170 //
171 // If RequireTLS is false (through messag header "TLS-Required: No"), the recipient
172 // domain's policy is ignored if it does not lead to a successful TLS connection,
173 // i.e. falling back to SMTP delivery with unverified STARTTLS or plain text.
174 RequireTLS *bool
175 // ../rfc/8689:250
176
177 // For DSNs, where the original FUTURERELEASE value must be included as per-message
178 // field. This field should be of the form "for;" plus interval, or "until;" plus
179 // utc date-time.
180 FutureReleaseRequest string
181 // ../rfc/4865:305
182
183 Extra map[string]string // Extra information, for transactional email.
184}
185
186// MsgResult is the result (or work in progress) of a delivery attempt.
187type MsgResult struct {
188 Start time.Time
189 Duration time.Duration
190 Success bool
191 Code int
192 Secode string
193 Error string
194 // todo: store smtp trace for failed deliveries for debugging, perhaps also for successful deliveries.
195}
196
197// Stored in MsgResult.Error while delivery is in progress. Replaced after success/error.
198const resultErrorDelivering = "delivering..."
199
200// markResult updates/adds a delivery result.
201func (m *Msg) markResult(code int, secode string, errmsg string, success bool) {
202 if len(m.Results) == 0 || m.Results[len(m.Results)-1].Error != resultErrorDelivering {
203 m.Results = append(m.Results, MsgResult{Start: time.Now()})
204 }
205 result := &m.Results[len(m.Results)-1]
206 result.Duration = time.Since(result.Start)
207 result.Code = code
208 result.Secode = secode
209 result.Error = errmsg
210 result.Success = success
211}
212
213// LastResult returns the last result entry, or an empty result.
214func (m *Msg) LastResult() MsgResult {
215 if len(m.Results) == 0 {
216 return MsgResult{Start: time.Now()}
217 }
218 return m.Results[len(m.Results)-1]
219}
220
221// Sender of message as used in MAIL FROM.
222func (m Msg) Sender() smtp.Path {
223 return smtp.Path{Localpart: m.SenderLocalpart, IPDomain: m.SenderDomain}
224}
225
226// Recipient of message as used in RCPT TO.
227func (m Msg) Recipient() smtp.Path {
228 return smtp.Path{Localpart: m.RecipientLocalpart, IPDomain: m.RecipientDomain}
229}
230
231// MessagePath returns the path where the message is stored.
232func (m Msg) MessagePath() string {
233 return mox.DataDirPath(filepath.Join("queue", store.MessagePath(m.ID)))
234}
235
236// todo: store which transport (if any) was actually used in MsgResult, based on routes.
237
238// Retired returns a MsgRetired for the message, for history of deliveries.
239func (m Msg) Retired(success bool, t, keepUntil time.Time) MsgRetired {
240 return MsgRetired{
241 ID: m.ID,
242 BaseID: m.BaseID,
243 Queued: m.Queued,
244 SenderAccount: m.SenderAccount,
245 SenderLocalpart: m.SenderLocalpart,
246 SenderDomainStr: m.SenderDomainStr,
247 FromID: m.FromID,
248 RecipientLocalpart: m.RecipientLocalpart,
249 RecipientDomain: m.RecipientDomain,
250 RecipientDomainStr: m.RecipientDomainStr,
251 Attempts: m.Attempts,
252 MaxAttempts: m.MaxAttempts,
253 DialedIPs: m.DialedIPs,
254 LastAttempt: m.LastAttempt,
255 Results: m.Results,
256 Has8bit: m.Has8bit,
257 SMTPUTF8: m.SMTPUTF8,
258 IsDMARCReport: m.IsDMARCReport,
259 IsTLSReport: m.IsTLSReport,
260 Size: m.Size,
261 MessageID: m.MessageID,
262 Subject: m.Subject,
263 Transport: m.Transport,
264 RequireTLS: m.RequireTLS,
265 FutureReleaseRequest: m.FutureReleaseRequest,
266 Extra: m.Extra,
267
268 RecipientAddress: smtp.Path{Localpart: m.RecipientLocalpart, IPDomain: m.RecipientDomain}.XString(true),
269 Success: success,
270 LastActivity: t,
271 KeepUntil: keepUntil,
272 }
273}
274
275// MsgRetired is a message for which delivery completed, either successful,
276// failed/canceled. Retired messages are only stored if so configured, and will be
277// cleaned up after the configured period.
278type MsgRetired struct {
279 ID int64 // Same ID as it was as Msg.ID.
280
281 BaseID int64
282 Queued time.Time
283 SenderAccount string // Failures are delivered back to this local account. Also used for routing.
284 SenderLocalpart smtp.Localpart // Should be a local user and domain.
285 SenderDomainStr string // For filtering, unicode.
286 FromID string `bstore:"index"` // Used to match DSNs.
287 RecipientLocalpart smtp.Localpart // Typically a remote user and domain.
288 RecipientDomain dns.IPDomain
289 RecipientDomainStr string // For filtering, unicode.
290 Attempts int // Next attempt is based on last attempt and exponential back off based on attempts.
291 MaxAttempts int // Max number of attempts before giving up. If 0, then the default of 8 attempts is used instead.
292 DialedIPs map[string][]net.IP // For each host, the IPs that were dialed. Used for IP selection for later attempts.
293 LastAttempt *time.Time
294 Results []MsgResult
295
296 Has8bit bool // Whether message contains bytes with high bit set, determines whether 8BITMIME SMTP extension is needed.
297 SMTPUTF8 bool // Whether message requires use of SMTPUTF8.
298 IsDMARCReport bool // Delivery failures for DMARC reports are handled differently.
299 IsTLSReport bool // Delivery failures for TLS reports are handled differently.
300 Size int64 // Full size of message, combined MsgPrefix with contents of message file.
301 MessageID string // Used when composing a DSN, in its References header.
302 Subject string // For context about delivery.
303
304 Transport string
305 RequireTLS *bool
306 FutureReleaseRequest string
307
308 Extra map[string]string // Extra information, for transactional email.
309
310 LastActivity time.Time `bstore:"index"`
311 RecipientAddress string `bstore:"index RecipientAddress+LastActivity"`
312 Success bool // Whether delivery to next hop succeeded.
313 KeepUntil time.Time `bstore:"index"`
314}
315
316// Sender of message as used in MAIL FROM.
317func (m MsgRetired) Sender() (path smtp.Path, err error) {
318 path.Localpart = m.RecipientLocalpart
319 if strings.HasPrefix(m.SenderDomainStr, "[") && strings.HasSuffix(m.SenderDomainStr, "]") {
320 s := m.SenderDomainStr[1 : len(m.SenderDomainStr)-1]
321 path.IPDomain.IP = net.ParseIP(s)
322 if path.IPDomain.IP == nil {
323 err = fmt.Errorf("parsing ip address %q: %v", s, err)
324 }
325 } else {
326 path.IPDomain.Domain, err = dns.ParseDomain(m.SenderDomainStr)
327 }
328 return
329}
330
331// Recipient of message as used in RCPT TO.
332func (m MsgRetired) Recipient() smtp.Path {
333 return smtp.Path{Localpart: m.RecipientLocalpart, IPDomain: m.RecipientDomain}
334}
335
336// LastResult returns the last result entry, or an empty result.
337func (m MsgRetired) LastResult() MsgResult {
338 if len(m.Results) == 0 {
339 return MsgResult{}
340 }
341 return m.Results[len(m.Results)-1]
342}
343
344// Init opens the queue database without starting delivery.
345func Init() error {
346 qpath := mox.DataDirPath(filepath.FromSlash("queue/index.db"))
347 os.MkdirAll(filepath.Dir(qpath), 0770)
348 isNew := false
349 if _, err := os.Stat(qpath); err != nil && os.IsNotExist(err) {
350 isNew = true
351 }
352
353 var err error
354 log := mlog.New("queue", nil)
355 opts := bstore.Options{Timeout: 5 * time.Second, Perm: 0660, RegisterLogger: moxvar.RegisterLogger(qpath, log.Logger)}
356 DB, err = bstore.Open(mox.Shutdown, qpath, &opts, DBTypes...)
357 if err == nil {
358 err = DB.Read(mox.Shutdown, func(tx *bstore.Tx) error {
359 return metricHoldUpdate(tx)
360 })
361 }
362 if err != nil {
363 if isNew {
364 os.Remove(qpath)
365 }
366 return fmt.Errorf("open queue database: %s", err)
367 }
368 return nil
369}
370
371// When we update the gauge, we just get the full current value, not try to account
372// for adds/removes.
373func metricHoldUpdate(tx *bstore.Tx) error {
374 count, err := bstore.QueryTx[Msg](tx).FilterNonzero(Msg{Hold: true}).Count()
375 if err != nil {
376 return fmt.Errorf("querying messages on hold for metric: %v", err)
377 }
378 metricHold.Set(float64(count))
379 return nil
380}
381
382// Shutdown closes the queue database. The delivery process isn't stopped. For tests only.
383func Shutdown() {
384 err := DB.Close()
385 if err != nil {
386 mlog.New("queue", nil).Errorx("closing queue db", err)
387 }
388 DB = nil
389}
390
391// todo: the filtering & sorting can use improvements. too much duplicated code (variants between {Msg,Hook}{,Retired}. Sort has pagination fields, some untyped.
392
393// Filter filters messages to list or operate on. Used by admin web interface
394// and cli.
395//
396// Only non-empty/non-zero values are applied to the filter. Leaving all fields
397// empty/zero matches all messages.
398type Filter struct {
399 Max int
400 IDs []int64
401 Account string
402 From string
403 To string
404 Hold *bool
405 Submitted string // Whether submitted before/after a time relative to now. ">$duration" or "<$duration", also with "now" for duration.
406 NextAttempt string // ">$duration" or "<$duration", also with "now" for duration.
407 Transport *string
408}
409
410func (f Filter) apply(q *bstore.Query[Msg]) error {
411 if len(f.IDs) > 0 {
412 q.FilterIDs(f.IDs)
413 }
414 applyTime := func(field string, s string) error {
415 orig := s
416 var before bool
417 if strings.HasPrefix(s, "<") {
418 before = true
419 } else if !strings.HasPrefix(s, ">") {
420 return fmt.Errorf(`must start with "<" for before or ">" for after a duration`)
421 }
422 s = strings.TrimSpace(s[1:])
423 var t time.Time
424 if s == "now" {
425 t = time.Now()
426 } else if d, err := time.ParseDuration(s); err != nil {
427 return fmt.Errorf("parsing duration %q: %v", orig, err)
428 } else {
429 t = time.Now().Add(d)
430 }
431 if before {
432 q.FilterLess(field, t)
433 } else {
434 q.FilterGreater(field, t)
435 }
436 return nil
437 }
438 if f.Hold != nil {
439 q.FilterEqual("Hold", *f.Hold)
440 }
441 if f.Submitted != "" {
442 if err := applyTime("Queued", f.Submitted); err != nil {
443 return fmt.Errorf("applying filter for submitted: %v", err)
444 }
445 }
446 if f.NextAttempt != "" {
447 if err := applyTime("NextAttempt", f.NextAttempt); err != nil {
448 return fmt.Errorf("applying filter for next attempt: %v", err)
449 }
450 }
451 if f.Account != "" {
452 q.FilterNonzero(Msg{SenderAccount: f.Account})
453 }
454 if f.Transport != nil {
455 q.FilterEqual("Transport", *f.Transport)
456 }
457 if f.From != "" || f.To != "" {
458 q.FilterFn(func(m Msg) bool {
459 return f.From != "" && strings.Contains(m.Sender().XString(true), f.From) || f.To != "" && strings.Contains(m.Recipient().XString(true), f.To)
460 })
461 }
462 if f.Max != 0 {
463 q.Limit(f.Max)
464 }
465 return nil
466}
467
468type Sort struct {
469 Field string // "Queued" or "NextAttempt"/"".
470 LastID int64 // If > 0, we return objects beyond this, less/greater depending on Asc.
471 Last any // Value of Field for last object. Must be set iff LastID is set.
472 Asc bool // Ascending, or descending.
473}
474
475func (s Sort) apply(q *bstore.Query[Msg]) error {
476 switch s.Field {
477 case "", "NextAttempt":
478 s.Field = "NextAttempt"
479 case "Queued":
480 s.Field = "Queued"
481 default:
482 return fmt.Errorf("unknown sort order field %q", s.Field)
483 }
484
485 if s.LastID > 0 {
486 ls, ok := s.Last.(string)
487 if !ok {
488 return fmt.Errorf("last should be string with time, not %T %q", s.Last, s.Last)
489 }
490 last, err := time.Parse(time.RFC3339Nano, ls)
491 if err != nil {
492 last, err = time.Parse(time.RFC3339, ls)
493 }
494 if err != nil {
495 return fmt.Errorf("parsing last %q as time: %v", s.Last, err)
496 }
497 q.FilterNotEqual("ID", s.LastID)
498 var fieldEqual func(m Msg) bool
499 if s.Field == "NextAttempt" {
500 fieldEqual = func(m Msg) bool { return m.NextAttempt.Equal(last) }
501 } else {
502 fieldEqual = func(m Msg) bool { return m.Queued.Equal(last) }
503 }
504 if s.Asc {
505 q.FilterGreaterEqual(s.Field, last)
506 q.FilterFn(func(m Msg) bool {
507 return !fieldEqual(m) || m.ID > s.LastID
508 })
509 } else {
510 q.FilterLessEqual(s.Field, last)
511 q.FilterFn(func(m Msg) bool {
512 return !fieldEqual(m) || m.ID < s.LastID
513 })
514 }
515 }
516 if s.Asc {
517 q.SortAsc(s.Field, "ID")
518 } else {
519 q.SortDesc(s.Field, "ID")
520 }
521 return nil
522}
523
524// List returns max 100 messages matching filter in the delivery queue.
525// By default, orders by next delivery attempt.
526func List(ctx context.Context, filter Filter, sort Sort) ([]Msg, error) {
527 q := bstore.QueryDB[Msg](ctx, DB)
528 if err := filter.apply(q); err != nil {
529 return nil, err
530 }
531 if err := sort.apply(q); err != nil {
532 return nil, err
533 }
534 qmsgs, err := q.List()
535 if err != nil {
536 return nil, err
537 }
538 return qmsgs, nil
539}
540
541// Count returns the number of messages in the delivery queue.
542func Count(ctx context.Context) (int, error) {
543 return bstore.QueryDB[Msg](ctx, DB).Count()
544}
545
546// HoldRuleList returns all hold rules.
547func HoldRuleList(ctx context.Context) ([]HoldRule, error) {
548 return bstore.QueryDB[HoldRule](ctx, DB).List()
549}
550
551// HoldRuleAdd adds a new hold rule causing newly submitted messages to be marked
552// as "on hold", and existing matching messages too.
553func HoldRuleAdd(ctx context.Context, log mlog.Log, hr HoldRule) (HoldRule, error) {
554 var n int
555 err := DB.Write(ctx, func(tx *bstore.Tx) error {
556 hr.ID = 0
557 hr.SenderDomainStr = hr.SenderDomain.Name()
558 hr.RecipientDomainStr = hr.RecipientDomain.Name()
559 if err := tx.Insert(&hr); err != nil {
560 return err
561 }
562 log.Info("adding hold rule", slog.Any("holdrule", hr))
563
564 q := bstore.QueryTx[Msg](tx)
565 if !hr.All() {
566 q.FilterNonzero(Msg{
567 SenderAccount: hr.Account,
568 SenderDomainStr: hr.SenderDomainStr,
569 RecipientDomainStr: hr.RecipientDomainStr,
570 })
571 }
572 var err error
573 n, err = q.UpdateField("Hold", true)
574 if err != nil {
575 return fmt.Errorf("marking existing matching messages in queue on hold: %v", err)
576 }
577 return metricHoldUpdate(tx)
578 })
579 if err != nil {
580 return HoldRule{}, err
581 }
582 log.Info("marked messages in queue as on hold", slog.Int("messages", n))
583 msgqueueKick()
584 return hr, nil
585}
586
587// HoldRuleRemove removes a hold rule. The Hold field of existing messages are not
588// changed.
589func HoldRuleRemove(ctx context.Context, log mlog.Log, holdRuleID int64) error {
590 return DB.Write(ctx, func(tx *bstore.Tx) error {
591 hr := HoldRule{ID: holdRuleID}
592 if err := tx.Get(&hr); err != nil {
593 return err
594 }
595 log.Info("removing hold rule", slog.Any("holdrule", hr))
596 return tx.Delete(HoldRule{ID: holdRuleID})
597 })
598}
599
600// MakeMsg is a convenience function that sets the commonly used fields for a Msg.
601// messageID should include <>.
602func MakeMsg(sender, recipient smtp.Path, has8bit, smtputf8 bool, size int64, messageID string, prefix []byte, requireTLS *bool, next time.Time, subject string) Msg {
603 return Msg{
604 SenderLocalpart: sender.Localpart,
605 SenderDomain: sender.IPDomain,
606 RecipientLocalpart: recipient.Localpart,
607 RecipientDomain: recipient.IPDomain,
608 Has8bit: has8bit,
609 SMTPUTF8: smtputf8,
610 Size: size,
611 MessageID: messageID,
612 MsgPrefix: prefix,
613 Subject: subject,
614 RequireTLS: requireTLS,
615 Queued: time.Now(),
616 NextAttempt: next,
617 }
618}
619
620// Add one or more new messages to the queue. If the sender paths and MsgPrefix are
621// identical, they'll get the same BaseID, so they can be delivered in a single
622// SMTP transaction, with a single DATA command, but may be split into multiple
623// transactions if errors/limits are encountered. The queue is kicked immediately
624// to start a first delivery attempt.
625//
626// ID of the messagse must be 0 and will be set after inserting in the queue.
627//
628// Add sets derived fields like SenderDomainStr and RecipientDomainStr, and fields
629// related to queueing, such as Queued, NextAttempt.
630func Add(ctx context.Context, log mlog.Log, senderAccount string, msgFile *os.File, qml ...Msg) error {
631 if len(qml) == 0 {
632 return fmt.Errorf("must queue at least one message")
633 }
634
635 base := true
636
637 for i, qm := range qml {
638 if qm.ID != 0 {
639 return fmt.Errorf("id of queued messages must be 0")
640 }
641 // Sanity check, internal consistency.
642 qml[i].SenderDomainStr = formatIPDomain(qm.SenderDomain)
643 qml[i].RecipientDomainStr = formatIPDomain(qm.RecipientDomain)
644 if base && i > 0 && qm.Sender().String() != qml[0].Sender().String() || !bytes.Equal(qm.MsgPrefix, qml[0].MsgPrefix) {
645 base = false
646 }
647 }
648
649 tx, err := DB.Begin(ctx, true)
650 if err != nil {
651 return fmt.Errorf("begin transaction: %w", err)
652 }
653 defer func() {
654 if tx != nil {
655 if err := tx.Rollback(); err != nil {
656 log.Errorx("rollback for queue", err)
657 }
658 }
659 }()
660
661 // Mark messages Hold if they match a hold rule.
662 holdRules, err := bstore.QueryTx[HoldRule](tx).List()
663 if err != nil {
664 return fmt.Errorf("getting queue hold rules")
665 }
666
667 // Insert messages into queue. If multiple messages are to be delivered in a single
668 // transaction, they all get a non-zero BaseID that is the Msg.ID of the first
669 // message inserted.
670 var baseID int64
671 for i := range qml {
672 // FromIDs must be unique if present. We don't have a unique index because values
673 // can be the empty string. We check in both Msg and MsgRetired, both are relevant
674 // for uniquely identifying a message sent in the past.
675 if fromID := qml[i].FromID; fromID != "" {
676 if exists, err := bstore.QueryTx[Msg](tx).FilterNonzero(Msg{FromID: fromID}).Exists(); err != nil {
677 return fmt.Errorf("looking up fromid: %v", err)
678 } else if exists {
679 return fmt.Errorf("%w: fromid %q already present in message queue", ErrFromID, fromID)
680 }
681 if exists, err := bstore.QueryTx[MsgRetired](tx).FilterNonzero(MsgRetired{FromID: fromID}).Exists(); err != nil {
682 return fmt.Errorf("looking up fromid: %v", err)
683 } else if exists {
684 return fmt.Errorf("%w: fromid %q already present in retired message queue", ErrFromID, fromID)
685 }
686 }
687
688 qml[i].SenderAccount = senderAccount
689 qml[i].BaseID = baseID
690 for _, hr := range holdRules {
691 if hr.matches(qml[i]) {
692 qml[i].Hold = true
693 break
694 }
695 }
696 if err := tx.Insert(&qml[i]); err != nil {
697 return err
698 }
699 if base && i == 0 && len(qml) > 1 {
700 baseID = qml[i].ID
701 qml[i].BaseID = baseID
702 if err := tx.Update(&qml[i]); err != nil {
703 return err
704 }
705 }
706 }
707
708 var paths []string
709 defer func() {
710 for _, p := range paths {
711 err := os.Remove(p)
712 log.Check(err, "removing destination message file for queue", slog.String("path", p))
713 }
714 }()
715
716 for _, qm := range qml {
717 dst := qm.MessagePath()
718 paths = append(paths, dst)
719 dstDir := filepath.Dir(dst)
720 os.MkdirAll(dstDir, 0770)
721 if err := moxio.LinkOrCopy(log, dst, msgFile.Name(), nil, true); err != nil {
722 return fmt.Errorf("linking/copying message to new file: %s", err)
723 } else if err := moxio.SyncDir(log, dstDir); err != nil {
724 return fmt.Errorf("sync directory: %v", err)
725 }
726 }
727
728 for _, m := range qml {
729 if m.Hold {
730 if err := metricHoldUpdate(tx); err != nil {
731 return err
732 }
733 break
734 }
735 }
736
737 if err := tx.Commit(); err != nil {
738 return fmt.Errorf("commit transaction: %s", err)
739 }
740 tx = nil
741 paths = nil
742
743 msgqueueKick()
744
745 return nil
746}
747
748func formatIPDomain(d dns.IPDomain) string {
749 if len(d.IP) > 0 {
750 return "[" + d.IP.String() + "]"
751 }
752 return d.Domain.Name()
753}
754
755var (
756 msgqueue = make(chan struct{}, 1)
757 deliveryResults = make(chan string, 1)
758)
759
760func kick() {
761 msgqueueKick()
762 hookqueueKick()
763}
764
765func msgqueueKick() {
766 select {
767 case msgqueue <- struct{}{}:
768 default:
769 }
770}
771
772// NextAttemptAdd adds a duration to the NextAttempt for all matching messages, and
773// kicks the queue.
774func NextAttemptAdd(ctx context.Context, filter Filter, d time.Duration) (affected int, err error) {
775 err = DB.Write(ctx, func(tx *bstore.Tx) error {
776 q := bstore.QueryTx[Msg](tx)
777 if err := filter.apply(q); err != nil {
778 return err
779 }
780 msgs, err := q.List()
781 if err != nil {
782 return fmt.Errorf("listing matching messages: %v", err)
783 }
784 for _, m := range msgs {
785 m.NextAttempt = m.NextAttempt.Add(d)
786 if err := tx.Update(&m); err != nil {
787 return err
788 }
789 }
790 affected = len(msgs)
791 return nil
792 })
793 if err != nil {
794 return 0, err
795 }
796 msgqueueKick()
797 return affected, nil
798}
799
800// NextAttemptSet sets NextAttempt for all matching messages to a new time, and
801// kicks the queue.
802func NextAttemptSet(ctx context.Context, filter Filter, t time.Time) (affected int, err error) {
803 q := bstore.QueryDB[Msg](ctx, DB)
804 if err := filter.apply(q); err != nil {
805 return 0, err
806 }
807 n, err := q.UpdateNonzero(Msg{NextAttempt: t})
808 if err != nil {
809 return 0, fmt.Errorf("selecting and updating messages in queue: %v", err)
810 }
811 msgqueueKick()
812 return n, nil
813}
814
815// HoldSet sets Hold for all matching messages and kicks the queue.
816func HoldSet(ctx context.Context, filter Filter, hold bool) (affected int, err error) {
817 err = DB.Write(ctx, func(tx *bstore.Tx) error {
818 q := bstore.QueryTx[Msg](tx)
819 if err := filter.apply(q); err != nil {
820 return err
821 }
822 n, err := q.UpdateFields(map[string]any{"Hold": hold})
823 if err != nil {
824 return fmt.Errorf("selecting and updating messages in queue: %v", err)
825 }
826 affected = n
827 return metricHoldUpdate(tx)
828 })
829 if err != nil {
830 return 0, err
831 }
832 msgqueueKick()
833 return affected, nil
834}
835
836// TransportSet changes the transport to use for the matching messages.
837func TransportSet(ctx context.Context, filter Filter, transport string) (affected int, err error) {
838 q := bstore.QueryDB[Msg](ctx, DB)
839 if err := filter.apply(q); err != nil {
840 return 0, err
841 }
842 n, err := q.UpdateFields(map[string]any{"Transport": transport})
843 if err != nil {
844 return 0, fmt.Errorf("selecting and updating messages in queue: %v", err)
845 }
846 msgqueueKick()
847 return n, nil
848}
849
850// Fail marks matching messages as failed for delivery, delivers a DSN to the
851// sender, and sends a webhook.
852//
853// Returns number of messages removed, which can be non-zero even in case of an
854// error.
855func Fail(ctx context.Context, log mlog.Log, f Filter) (affected int, err error) {
856 return failDrop(ctx, log, f, true)
857}
858
859// Drop removes matching messages from the queue. Messages are added as retired
860// message, webhooks with the "canceled" event are queued.
861//
862// Returns number of messages removed, which can be non-zero even in case of an
863// error.
864func Drop(ctx context.Context, log mlog.Log, f Filter) (affected int, err error) {
865 return failDrop(ctx, log, f, false)
866}
867
868func failDrop(ctx context.Context, log mlog.Log, filter Filter, fail bool) (affected int, err error) {
869 var msgs []Msg
870 err = DB.Write(ctx, func(tx *bstore.Tx) error {
871 q := bstore.QueryTx[Msg](tx)
872 if err := filter.apply(q); err != nil {
873 return err
874 }
875 var err error
876 msgs, err = q.List()
877 if err != nil {
878 return fmt.Errorf("getting messages to delete: %v", err)
879 }
880
881 if len(msgs) == 0 {
882 return nil
883 }
884
885 now := time.Now()
886 var remoteMTA dsn.NameIP
887 for i := range msgs {
888 result := MsgResult{
889 Start: now,
890 Error: "delivery canceled by admin",
891 }
892 msgs[i].Results = append(msgs[i].Results, result)
893 if fail {
894 if msgs[i].LastAttempt == nil {
895 msgs[i].LastAttempt = &now
896 }
897 deliverDSNFailure(log, msgs[i], remoteMTA, "", result.Error, nil)
898 }
899 }
900 event := webhook.EventCanceled
901 if fail {
902 event = webhook.EventFailed
903 }
904 if err := retireMsgs(log, tx, event, 0, "", nil, msgs...); err != nil {
905 return fmt.Errorf("removing queue messages from database: %w", err)
906 }
907 return metricHoldUpdate(tx)
908 })
909 if err != nil {
910 return 0, err
911 }
912 if len(msgs) > 0 {
913 if err := removeMsgsFS(log, msgs...); err != nil {
914 return len(msgs), fmt.Errorf("removing queue messages from file system: %w", err)
915 }
916 }
917 kick()
918 return len(msgs), nil
919}
920
921// RequireTLSSet updates the RequireTLS field of matching messages.
922func RequireTLSSet(ctx context.Context, filter Filter, requireTLS *bool) (affected int, err error) {
923 q := bstore.QueryDB[Msg](ctx, DB)
924 if err := filter.apply(q); err != nil {
925 return 0, err
926 }
927 n, err := q.UpdateFields(map[string]any{"RequireTLS": requireTLS})
928 msgqueueKick()
929 return n, err
930}
931
932// RetiredFilter filters messages to list or operate on. Used by admin web interface
933// and cli.
934//
935// Only non-empty/non-zero values are applied to the filter. Leaving all fields
936// empty/zero matches all messages.
937type RetiredFilter struct {
938 Max int
939 IDs []int64
940 Account string
941 From string
942 To string
943 Submitted string // Whether submitted before/after a time relative to now. ">$duration" or "<$duration", also with "now" for duration.
944 LastActivity string // ">$duration" or "<$duration", also with "now" for duration.
945 Transport *string
946 Success *bool
947}
948
949func (f RetiredFilter) apply(q *bstore.Query[MsgRetired]) error {
950 if len(f.IDs) > 0 {
951 q.FilterIDs(f.IDs)
952 }
953 applyTime := func(field string, s string) error {
954 orig := s
955 var before bool
956 if strings.HasPrefix(s, "<") {
957 before = true
958 } else if !strings.HasPrefix(s, ">") {
959 return fmt.Errorf(`must start with "<" for before or ">" for after a duration`)
960 }
961 s = strings.TrimSpace(s[1:])
962 var t time.Time
963 if s == "now" {
964 t = time.Now()
965 } else if d, err := time.ParseDuration(s); err != nil {
966 return fmt.Errorf("parsing duration %q: %v", orig, err)
967 } else {
968 t = time.Now().Add(d)
969 }
970 if before {
971 q.FilterLess(field, t)
972 } else {
973 q.FilterGreater(field, t)
974 }
975 return nil
976 }
977 if f.Submitted != "" {
978 if err := applyTime("Queued", f.Submitted); err != nil {
979 return fmt.Errorf("applying filter for submitted: %v", err)
980 }
981 }
982 if f.LastActivity != "" {
983 if err := applyTime("LastActivity", f.LastActivity); err != nil {
984 return fmt.Errorf("applying filter for last activity: %v", err)
985 }
986 }
987 if f.Account != "" {
988 q.FilterNonzero(MsgRetired{SenderAccount: f.Account})
989 }
990 if f.Transport != nil {
991 q.FilterEqual("Transport", *f.Transport)
992 }
993 if f.From != "" || f.To != "" {
994 q.FilterFn(func(m MsgRetired) bool {
995 return f.From != "" && strings.Contains(m.SenderLocalpart.String()+"@"+m.SenderDomainStr, f.From) || f.To != "" && strings.Contains(m.Recipient().XString(true), f.To)
996 })
997 }
998 if f.Success != nil {
999 q.FilterEqual("Success", *f.Success)
1000 }
1001 if f.Max != 0 {
1002 q.Limit(f.Max)
1003 }
1004 return nil
1005}
1006
1007type RetiredSort struct {
1008 Field string // "Queued" or "LastActivity"/"".
1009 LastID int64 // If > 0, we return objects beyond this, less/greater depending on Asc.
1010 Last any // Value of Field for last object. Must be set iff LastID is set.
1011 Asc bool // Ascending, or descending.
1012}
1013
1014func (s RetiredSort) apply(q *bstore.Query[MsgRetired]) error {
1015 switch s.Field {
1016 case "", "LastActivity":
1017 s.Field = "LastActivity"
1018 case "Queued":
1019 s.Field = "Queued"
1020 default:
1021 return fmt.Errorf("unknown sort order field %q", s.Field)
1022 }
1023
1024 if s.LastID > 0 {
1025 ls, ok := s.Last.(string)
1026 if !ok {
1027 return fmt.Errorf("last should be string with time, not %T %q", s.Last, s.Last)
1028 }
1029 last, err := time.Parse(time.RFC3339Nano, ls)
1030 if err != nil {
1031 last, err = time.Parse(time.RFC3339, ls)
1032 }
1033 if err != nil {
1034 return fmt.Errorf("parsing last %q as time: %v", s.Last, err)
1035 }
1036 q.FilterNotEqual("ID", s.LastID)
1037 var fieldEqual func(m MsgRetired) bool
1038 if s.Field == "LastActivity" {
1039 fieldEqual = func(m MsgRetired) bool { return m.LastActivity.Equal(last) }
1040 } else {
1041 fieldEqual = func(m MsgRetired) bool { return m.Queued.Equal(last) }
1042 }
1043 if s.Asc {
1044 q.FilterGreaterEqual(s.Field, last)
1045 q.FilterFn(func(mr MsgRetired) bool {
1046 return !fieldEqual(mr) || mr.ID > s.LastID
1047 })
1048 } else {
1049 q.FilterLessEqual(s.Field, last)
1050 q.FilterFn(func(mr MsgRetired) bool {
1051 return !fieldEqual(mr) || mr.ID < s.LastID
1052 })
1053 }
1054 }
1055 if s.Asc {
1056 q.SortAsc(s.Field, "ID")
1057 } else {
1058 q.SortDesc(s.Field, "ID")
1059 }
1060 return nil
1061}
1062
1063// RetiredList returns retired messages.
1064func RetiredList(ctx context.Context, filter RetiredFilter, sort RetiredSort) ([]MsgRetired, error) {
1065 q := bstore.QueryDB[MsgRetired](ctx, DB)
1066 if err := filter.apply(q); err != nil {
1067 return nil, err
1068 }
1069 if err := sort.apply(q); err != nil {
1070 return nil, err
1071 }
1072 return q.List()
1073}
1074
1075type ReadReaderAtCloser interface {
1076 io.ReadCloser
1077 io.ReaderAt
1078}
1079
1080// OpenMessage opens a message present in the queue.
1081func OpenMessage(ctx context.Context, id int64) (ReadReaderAtCloser, error) {
1082 qm := Msg{ID: id}
1083 err := DB.Get(ctx, &qm)
1084 if err != nil {
1085 return nil, err
1086 }
1087 f, err := os.Open(qm.MessagePath())
1088 if err != nil {
1089 return nil, fmt.Errorf("open message file: %s", err)
1090 }
1091 r := store.FileMsgReader(qm.MsgPrefix, f)
1092 return r, err
1093}
1094
1095const maxConcurrentDeliveries = 10
1096const maxConcurrentHookDeliveries = 10
1097
1098// Start opens the database by calling Init, then starts the delivery and cleanup
1099// processes.
1100func Start(resolver dns.Resolver, done chan struct{}) error {
1101 if err := Init(); err != nil {
1102 return err
1103 }
1104
1105 go startQueue(resolver, done)
1106 go startHookQueue(done)
1107
1108 go cleanupMsgRetired(done)
1109 go cleanupHookRetired(done)
1110
1111 return nil
1112}
1113
1114func cleanupMsgRetired(done chan struct{}) {
1115 log := mlog.New("queue", nil)
1116
1117 defer func() {
1118 x := recover()
1119 if x != nil {
1120 log.Error("unhandled panic in cleanupMsgRetired", slog.Any("x", x))
1121 debug.PrintStack()
1122 metrics.PanicInc(metrics.Queue)
1123 }
1124 }()
1125
1126 timer := time.NewTimer(3 * time.Second)
1127 for {
1128 select {
1129 case <-mox.Shutdown.Done():
1130 done <- struct{}{}
1131 return
1132 case <-timer.C:
1133 }
1134
1135 cleanupMsgRetiredSingle(log)
1136 timer.Reset(time.Hour)
1137 }
1138}
1139
1140func cleanupMsgRetiredSingle(log mlog.Log) {
1141 n, err := bstore.QueryDB[MsgRetired](mox.Shutdown, DB).FilterLess("KeepUntil", time.Now()).Delete()
1142 log.Check(err, "removing old retired messages")
1143 if n > 0 {
1144 log.Debug("cleaned up retired messages", slog.Int("count", n))
1145 }
1146}
1147
1148func startQueue(resolver dns.Resolver, done chan struct{}) {
1149 // High-level delivery strategy advice: ../rfc/5321:3685
1150 log := mlog.New("queue", nil)
1151
1152 // Map keys are either dns.Domain.Name()'s, or string-formatted IP addresses.
1153 busyDomains := map[string]struct{}{}
1154
1155 timer := time.NewTimer(0)
1156
1157 for {
1158 select {
1159 case <-mox.Shutdown.Done():
1160 for len(busyDomains) > 0 {
1161 domain := <-deliveryResults
1162 delete(busyDomains, domain)
1163 }
1164 done <- struct{}{}
1165 return
1166 case <-msgqueue:
1167 case <-timer.C:
1168 case domain := <-deliveryResults:
1169 delete(busyDomains, domain)
1170 }
1171
1172 if len(busyDomains) >= maxConcurrentDeliveries {
1173 continue
1174 }
1175
1176 launchWork(log, resolver, busyDomains)
1177 timer.Reset(nextWork(mox.Shutdown, log, busyDomains))
1178 }
1179}
1180
1181func nextWork(ctx context.Context, log mlog.Log, busyDomains map[string]struct{}) time.Duration {
1182 q := bstore.QueryDB[Msg](ctx, DB)
1183 if len(busyDomains) > 0 {
1184 var doms []any
1185 for d := range busyDomains {
1186 doms = append(doms, d)
1187 }
1188 q.FilterNotEqual("RecipientDomainStr", doms...)
1189 }
1190 q.FilterEqual("Hold", false)
1191 q.SortAsc("NextAttempt")
1192 q.Limit(1)
1193 qm, err := q.Get()
1194 if err == bstore.ErrAbsent {
1195 return 24 * time.Hour
1196 } else if err != nil {
1197 log.Errorx("finding time for next delivery attempt", err)
1198 return 1 * time.Minute
1199 }
1200 return time.Until(qm.NextAttempt)
1201}
1202
1203func launchWork(log mlog.Log, resolver dns.Resolver, busyDomains map[string]struct{}) int {
1204 q := bstore.QueryDB[Msg](mox.Shutdown, DB)
1205 q.FilterLessEqual("NextAttempt", time.Now())
1206 q.FilterEqual("Hold", false)
1207 q.SortAsc("NextAttempt")
1208 q.Limit(maxConcurrentDeliveries)
1209 if len(busyDomains) > 0 {
1210 var doms []any
1211 for d := range busyDomains {
1212 doms = append(doms, d)
1213 }
1214 q.FilterNotEqual("RecipientDomainStr", doms...)
1215 }
1216 var msgs []Msg
1217 seen := map[string]bool{}
1218 err := q.ForEach(func(m Msg) error {
1219 dom := m.RecipientDomainStr
1220 if _, ok := busyDomains[dom]; !ok && !seen[dom] {
1221 seen[dom] = true
1222 msgs = append(msgs, m)
1223 }
1224 return nil
1225 })
1226 if err != nil {
1227 log.Errorx("querying for work in queue", err)
1228 mox.Sleep(mox.Shutdown, 1*time.Second)
1229 return -1
1230 }
1231
1232 for _, m := range msgs {
1233 busyDomains[m.RecipientDomainStr] = struct{}{}
1234 go deliver(log, resolver, m)
1235 }
1236 return len(msgs)
1237}
1238
1239// todo future: we may consider keeping message files around for a while after retiring. especially for failures to deliver. to inspect what exactly wasn't delivered.
1240
1241func removeMsgsFS(log mlog.Log, msgs ...Msg) error {
1242 var errs []string
1243 for _, m := range msgs {
1244 p := mox.DataDirPath(filepath.Join("queue", store.MessagePath(m.ID)))
1245 if err := os.Remove(p); err != nil {
1246 errs = append(errs, fmt.Sprintf("%s: %v", p, err))
1247 }
1248 }
1249 if len(errs) > 0 {
1250 return fmt.Errorf("removing message files from queue: %s", strings.Join(errs, "; "))
1251 }
1252 return nil
1253}
1254
1255// Move one or more messages to retire list or remove it. Webhooks are scheduled.
1256// IDs of msgs in suppressedMsgIDs caused a suppression to be added.
1257//
1258// Callers should update Msg.Results before calling.
1259//
1260// Callers must remove the messages from the file system afterwards, see
1261// removeMsgsFS. Callers must also kick the message and webhook queues.
1262func retireMsgs(log mlog.Log, tx *bstore.Tx, event webhook.OutgoingEvent, code int, secode string, suppressedMsgIDs []int64, msgs ...Msg) error {
1263 now := time.Now()
1264
1265 var hooks []Hook
1266 m0 := msgs[0]
1267 accConf, ok := mox.Conf.Account(m0.SenderAccount)
1268 var hookURL string
1269 if accConf.OutgoingWebhook != nil {
1270 hookURL = accConf.OutgoingWebhook.URL
1271 }
1272 log.Debug("retiring messages from queue", slog.Any("event", event), slog.String("account", m0.SenderAccount), slog.Bool("ok", ok), slog.String("webhookurl", hookURL))
1273 if hookURL != "" && (len(accConf.OutgoingWebhook.Events) == 0 || slices.Contains(accConf.OutgoingWebhook.Events, string(event))) {
1274 for _, m := range msgs {
1275 suppressing := slices.Contains(suppressedMsgIDs, m.ID)
1276 h, err := hookCompose(m, hookURL, accConf.OutgoingWebhook.Authorization, event, suppressing, code, secode)
1277 if err != nil {
1278 log.Errorx("composing webhooks while retiring messages from queue, not queueing hook for message", err, slog.Int64("msgid", m.ID), slog.Any("recipient", m.Recipient()))
1279 } else {
1280 hooks = append(hooks, h)
1281 }
1282 }
1283 }
1284
1285 msgKeep := 24 * 7 * time.Hour
1286 hookKeep := 24 * 7 * time.Hour
1287 if ok {
1288 msgKeep = accConf.KeepRetiredMessagePeriod
1289 hookKeep = accConf.KeepRetiredWebhookPeriod
1290 }
1291
1292 for _, m := range msgs {
1293 if err := tx.Delete(&m); err != nil {
1294 return err
1295 }
1296 }
1297 if msgKeep > 0 {
1298 for _, m := range msgs {
1299 rm := m.Retired(event == webhook.EventDelivered, now, now.Add(msgKeep))
1300 if err := tx.Insert(&rm); err != nil {
1301 return err
1302 }
1303 }
1304 }
1305
1306 for i := range hooks {
1307 if err := hookInsert(tx, &hooks[i], now, hookKeep); err != nil {
1308 return fmt.Errorf("enqueueing webhooks while retiring messages from queue: %v", err)
1309 }
1310 }
1311
1312 if len(hooks) > 0 {
1313 for _, h := range hooks {
1314 log.Debug("queued webhook while retiring message from queue", h.attrs()...)
1315 }
1316 hookqueueKick()
1317 }
1318 return nil
1319}
1320
1321// deliver attempts to deliver a message.
1322// The queue is updated, either by removing a delivered or permanently failed
1323// message, or updating the time for the next attempt. A DSN may be sent.
1324func deliver(log mlog.Log, resolver dns.Resolver, m0 Msg) {
1325 ctx := mox.Shutdown
1326
1327 qlog := log.WithCid(mox.Cid()).With(
1328 slog.Any("from", m0.Sender()),
1329 slog.Int("attempts", m0.Attempts))
1330
1331 defer func() {
1332 deliveryResults <- formatIPDomain(m0.RecipientDomain)
1333
1334 x := recover()
1335 if x != nil {
1336 qlog.Error("deliver panic", slog.Any("panic", x), slog.Int64("msgid", m0.ID), slog.Any("recipient", m0.Recipient()))
1337 debug.PrintStack()
1338 metrics.PanicInc(metrics.Queue)
1339 }
1340 }()
1341
1342 // We'll use a single transaction for the various checks, committing as soon as
1343 // we're done with it.
1344 xtx, err := DB.Begin(mox.Shutdown, true)
1345 if err != nil {
1346 qlog.Errorx("transaction for gathering messages to deliver", err)
1347 return
1348 }
1349 defer func() {
1350 if xtx != nil {
1351 err := xtx.Rollback()
1352 qlog.Check(err, "rolling back transaction after error delivering")
1353 }
1354 }()
1355
1356 // We register this attempt by setting LastAttempt, adding an empty Result, and
1357 // already setting NextAttempt in the future with exponential backoff. If we run
1358 // into trouble delivery below, at least we won't be bothering the receiving server
1359 // with our problems.
1360 // Delivery attempts: immediately, 7.5m, 15m, 30m, 1h, 2h (send delayed DSN), 4h,
1361 // 8h, 16h (send permanent failure DSN).
1362 // ../rfc/5321:3703
1363 // todo future: make the back off times configurable. ../rfc/5321:3713
1364 now := time.Now()
1365 var backoff time.Duration
1366 var origNextAttempt time.Time
1367 prepare := func() error {
1368 // Refresh message within transaction.
1369 m0 = Msg{ID: m0.ID}
1370 if err := xtx.Get(&m0); err != nil {
1371 return fmt.Errorf("get message to be delivered: %v", err)
1372 }
1373
1374 backoff = time.Duration(7*60+30+jitter.IntN(10)-5) * time.Second
1375 for i := 0; i < m0.Attempts; i++ {
1376 backoff *= time.Duration(2)
1377 }
1378 m0.Attempts++
1379 origNextAttempt = m0.NextAttempt
1380 m0.LastAttempt = &now
1381 m0.NextAttempt = now.Add(backoff)
1382 m0.Results = append(m0.Results, MsgResult{Start: now, Error: resultErrorDelivering})
1383 if err := xtx.Update(&m0); err != nil {
1384 return fmt.Errorf("update message to be delivered: %v", err)
1385 }
1386 return nil
1387 }
1388 if err := prepare(); err != nil {
1389 qlog.Errorx("storing delivery attempt", err, slog.Int64("msgid", m0.ID), slog.Any("recipient", m0.Recipient()))
1390 return
1391 }
1392
1393 var remoteMTA dsn.NameIP // Zero value, will not be included in DSN. ../rfc/3464:1027
1394
1395 // If domain of sender is currently disabled, fail the delivery attempt.
1396 if domConf, _ := mox.Conf.Domain(m0.SenderDomain.Domain); domConf.Disabled {
1397 failMsgsTx(qlog, xtx, []*Msg{&m0}, m0.DialedIPs, backoff, remoteMTA, fmt.Errorf("domain of sender temporarily disabled"))
1398 err = xtx.Commit()
1399 qlog.Check(err, "commit processing failure to deliver messages")
1400 xtx = nil
1401 kick()
1402 return
1403 }
1404
1405 // Check if recipient is on suppression list. If so, fail delivery.
1406 path := smtp.Path{Localpart: m0.RecipientLocalpart, IPDomain: m0.RecipientDomain}
1407 baseAddr := baseAddress(path).XString(true)
1408 qsup := bstore.QueryTx[webapi.Suppression](xtx)
1409 qsup.FilterNonzero(webapi.Suppression{Account: m0.SenderAccount, BaseAddress: baseAddr})
1410 exists, err := qsup.Exists()
1411 if err != nil || exists {
1412 if err != nil {
1413 qlog.Errorx("checking whether recipient address is in suppression list", err)
1414 } else {
1415 err := fmt.Errorf("not delivering to recipient address %s: %w", path.XString(true), errSuppressed)
1416 err = smtpclient.Error{Permanent: true, Err: err}
1417 failMsgsTx(qlog, xtx, []*Msg{&m0}, m0.DialedIPs, backoff, remoteMTA, err)
1418 }
1419 err = xtx.Commit()
1420 qlog.Check(err, "commit processing failure to deliver messages")
1421 xtx = nil
1422 kick()
1423 return
1424 }
1425
1426 resolveTransport := func(mm Msg) (string, config.Transport, bool) {
1427 if mm.Transport != "" {
1428 transport, ok := mox.Conf.Static.Transports[mm.Transport]
1429 if !ok {
1430 return "", config.Transport{}, false
1431 }
1432 return mm.Transport, transport, ok
1433 }
1434 route := findRoute(mm.Attempts, mm)
1435 return route.Transport, route.ResolvedTransport, true
1436 }
1437
1438 // Find route for transport to use for delivery attempt.
1439 m0.Attempts--
1440 transportName, transport, transportOK := resolveTransport(m0)
1441 m0.Attempts++
1442 if !transportOK {
1443 failMsgsTx(qlog, xtx, []*Msg{&m0}, m0.DialedIPs, backoff, remoteMTA, fmt.Errorf("cannot find transport %q", m0.Transport))
1444 err = xtx.Commit()
1445 qlog.Check(err, "commit processing failure to deliver messages")
1446 xtx = nil
1447 kick()
1448 return
1449 }
1450
1451 if transportName != "" {
1452 qlog = qlog.With(slog.String("transport", transportName))
1453 qlog.Debug("delivering with transport")
1454 }
1455
1456 // Attempt to gather more recipients for this identical message, only with the same
1457 // recipient domain, and under the same conditions (recipientdomain, attempts,
1458 // requiretls, transport). ../rfc/5321:3759
1459 msgs := []*Msg{&m0}
1460 if m0.BaseID != 0 {
1461 gather := func() error {
1462 q := bstore.QueryTx[Msg](xtx)
1463 q.FilterNonzero(Msg{BaseID: m0.BaseID, RecipientDomainStr: m0.RecipientDomainStr, Attempts: m0.Attempts - 1})
1464 q.FilterNotEqual("ID", m0.ID)
1465 q.FilterLessEqual("NextAttempt", origNextAttempt)
1466 q.FilterEqual("Hold", false)
1467 err := q.ForEach(func(xm Msg) error {
1468 mrtls := m0.RequireTLS != nil
1469 xmrtls := xm.RequireTLS != nil
1470 if mrtls != xmrtls || mrtls && *m0.RequireTLS != *xm.RequireTLS {
1471 return nil
1472 }
1473 tn, _, ok := resolveTransport(xm)
1474 if ok && tn == transportName {
1475 msgs = append(msgs, &xm)
1476 }
1477 return nil
1478 })
1479 if err != nil {
1480 return fmt.Errorf("looking up more recipients: %v", err)
1481 }
1482
1483 // Mark these additional messages as attempted too.
1484 for _, mm := range msgs[1:] {
1485 mm.Attempts++
1486 mm.NextAttempt = m0.NextAttempt
1487 mm.LastAttempt = m0.LastAttempt
1488 mm.Results = append(mm.Results, MsgResult{Start: now, Error: resultErrorDelivering})
1489 if err := xtx.Update(mm); err != nil {
1490 return fmt.Errorf("updating more message recipients for smtp transaction: %v", err)
1491 }
1492 }
1493 return nil
1494 }
1495 if err := gather(); err != nil {
1496 qlog.Errorx("error finding more recipients for message, will attempt to send to single recipient", err)
1497 msgs = msgs[:1]
1498 }
1499 }
1500
1501 if err := xtx.Commit(); err != nil {
1502 qlog.Errorx("commit of preparation to deliver", err, slog.Any("msgid", m0.ID))
1503 return
1504 }
1505 xtx = nil
1506
1507 if len(msgs) > 1 {
1508 ids := make([]int64, len(msgs))
1509 rcpts := make([]smtp.Path, len(msgs))
1510 for i, m := range msgs {
1511 ids[i] = m.ID
1512 rcpts[i] = m.Recipient()
1513 }
1514 qlog.Debug("delivering to multiple recipients", slog.Any("msgids", ids), slog.Any("recipients", rcpts))
1515 } else {
1516 qlog.Debug("delivering to single recipient", slog.Any("msgid", m0.ID), slog.Any("recipient", m0.Recipient()))
1517 }
1518
1519 if Localserve {
1520 deliverLocalserve(ctx, qlog, msgs, backoff)
1521 return
1522 }
1523
1524 // We gather TLS connection successes and failures during delivery, and we store
1525 // them in tlsrptdb. Every 24 hours we send an email with a report to the recipient
1526 // domains that opt in via a TLSRPT DNS record. For us, the tricky part is
1527 // collecting all reporting information. We've got several TLS modes
1528 // (opportunistic, DANE and/or MTA-STS (PKIX), overrides due to Require TLS).
1529 // Failures can happen at various levels: MTA-STS policies (apply to whole delivery
1530 // attempt/domain), MX targets (possibly multiple per delivery attempt, both for
1531 // MTA-STS and DANE).
1532 //
1533 // Once the SMTP client has tried a TLS handshake, we register success/failure,
1534 // regardless of what happens next on the connection. We also register failures
1535 // when they happen before we get to the SMTP client, but only if they are related
1536 // to TLS (and some DNSSEC).
1537 var recipientDomainResult tlsrpt.Result
1538 var hostResults []tlsrpt.Result
1539 defer func() {
1540 if mox.Conf.Static.NoOutgoingTLSReports || m0.RecipientDomain.IsIP() {
1541 return
1542 }
1543
1544 now := time.Now()
1545 dayUTC := now.UTC().Format("20060102")
1546
1547 // See if this contains a failure. If not, we'll mark TLS results for delivering
1548 // DMARC reports SendReport false, so we won't as easily get into a report sending
1549 // loop.
1550 var failure bool
1551 for _, result := range hostResults {
1552 if result.Summary.TotalFailureSessionCount > 0 {
1553 failure = true
1554 break
1555 }
1556 }
1557 if recipientDomainResult.Summary.TotalFailureSessionCount > 0 {
1558 failure = true
1559 }
1560
1561 results := make([]tlsrptdb.TLSResult, 0, 1+len(hostResults))
1562 tlsaPolicyDomains := map[string]bool{}
1563 addResult := func(r tlsrpt.Result, isHost bool) {
1564 var zerotype tlsrpt.PolicyType
1565 if r.Policy.Type == zerotype {
1566 return
1567 }
1568
1569 // Ensure we store policy domain in unicode in database.
1570 policyDomain, err := dns.ParseDomain(r.Policy.Domain)
1571 if err != nil {
1572 qlog.Errorx("parsing policy domain for tls result", err, slog.String("policydomain", r.Policy.Domain))
1573 return
1574 }
1575
1576 if r.Policy.Type == tlsrpt.TLSA {
1577 tlsaPolicyDomains[policyDomain.ASCII] = true
1578 }
1579
1580 tlsResult := tlsrptdb.TLSResult{
1581 PolicyDomain: policyDomain.Name(),
1582 DayUTC: dayUTC,
1583 RecipientDomain: m0.RecipientDomain.Domain.Name(),
1584 IsHost: isHost,
1585 SendReport: !m0.IsTLSReport && (!m0.IsDMARCReport || failure),
1586 Results: []tlsrpt.Result{r},
1587 }
1588 results = append(results, tlsResult)
1589 }
1590 for _, result := range hostResults {
1591 addResult(result, true)
1592 }
1593 // If we were delivering to a mail host directly (not a domain with MX records), we
1594 // are more likely to get a TLSA policy than an STS policy. Don't potentially
1595 // confuse operators with both a tlsa and no-policy-found result.
1596 // todo spec: ../rfc/8460:440 an explicit no-sts-policy result would be useful.
1597 if recipientDomainResult.Policy.Type != tlsrpt.NoPolicyFound || !tlsaPolicyDomains[recipientDomainResult.Policy.Domain] {
1598 addResult(recipientDomainResult, false)
1599 }
1600
1601 if len(results) > 0 {
1602 err := tlsrptdb.AddTLSResults(context.Background(), results)
1603 qlog.Check(err, "adding tls results to database for upcoming tlsrpt report")
1604 }
1605 }()
1606
1607 var dialer smtpclient.Dialer = &net.Dialer{}
1608 if transport.Submissions != nil {
1609 deliverSubmit(qlog, resolver, dialer, msgs, backoff, transportName, transport.Submissions, true, 465)
1610 } else if transport.Submission != nil {
1611 deliverSubmit(qlog, resolver, dialer, msgs, backoff, transportName, transport.Submission, false, 587)
1612 } else if transport.SMTP != nil {
1613 // todo future: perhaps also gather tlsrpt results for submissions.
1614 deliverSubmit(qlog, resolver, dialer, msgs, backoff, transportName, transport.SMTP, false, 25)
1615 } else {
1616 ourHostname := mox.Conf.Static.HostnameDomain
1617 if transport.Socks != nil {
1618 socksdialer, err := proxy.SOCKS5("tcp", transport.Socks.Address, nil, &net.Dialer{})
1619 if err != nil {
1620 failMsgsDB(qlog, msgs, msgs[0].DialedIPs, backoff, dsn.NameIP{}, fmt.Errorf("socks dialer: %v", err))
1621 return
1622 } else if d, ok := socksdialer.(smtpclient.Dialer); !ok {
1623 failMsgsDB(qlog, msgs, msgs[0].DialedIPs, backoff, dsn.NameIP{}, fmt.Errorf("socks dialer is not a contextdialer"))
1624 return
1625 } else {
1626 dialer = d
1627 }
1628 ourHostname = transport.Socks.Hostname
1629 }
1630 recipientDomainResult, hostResults = deliverDirect(qlog, resolver, dialer, ourHostname, transportName, transport.Direct, msgs, backoff)
1631 }
1632}
1633
1634func findRoute(attempt int, m Msg) config.Route {
1635 routesAccount, routesDomain, routesGlobal := mox.Conf.Routes(m.SenderAccount, m.SenderDomain.Domain)
1636 if r, ok := findRouteInList(attempt, m, routesAccount); ok {
1637 return r
1638 }
1639 if r, ok := findRouteInList(attempt, m, routesDomain); ok {
1640 return r
1641 }
1642 if r, ok := findRouteInList(attempt, m, routesGlobal); ok {
1643 return r
1644 }
1645 return config.Route{}
1646}
1647
1648func findRouteInList(attempt int, m Msg, routes []config.Route) (config.Route, bool) {
1649 for _, r := range routes {
1650 if routeMatch(attempt, m, r) {
1651 return r, true
1652 }
1653 }
1654 return config.Route{}, false
1655}
1656
1657func routeMatch(attempt int, m Msg, r config.Route) bool {
1658 return attempt >= r.MinimumAttempts && routeMatchDomain(r.FromDomainASCII, m.SenderDomain.Domain) && routeMatchDomain(r.ToDomainASCII, m.RecipientDomain.Domain)
1659}
1660
1661func routeMatchDomain(l []string, d dns.Domain) bool {
1662 if len(l) == 0 {
1663 return true
1664 }
1665 for _, e := range l {
1666 if d.ASCII == e || strings.HasPrefix(e, ".") && (d.ASCII == e[1:] || strings.HasSuffix(d.ASCII, e)) {
1667 return true
1668 }
1669 }
1670 return false
1671}
1672
1673// Returns string representing delivery result for err, and number of delivered and
1674// failed messages.
1675//
1676// Values: ok, okpartial, timeout, canceled, temperror, permerror, error.
1677func deliveryResult(err error, delivered, failed int) string {
1678 var cerr smtpclient.Error
1679 switch {
1680 case err == nil:
1681 if delivered == 0 {
1682 return "error"
1683 } else if failed > 0 {
1684 return "okpartial"
1685 }
1686 return "ok"
1687 case errors.Is(err, os.ErrDeadlineExceeded), errors.Is(err, context.DeadlineExceeded):
1688 return "timeout"
1689 case errors.Is(err, context.Canceled):
1690 return "canceled"
1691 case errors.As(err, &cerr):
1692 if cerr.Permanent {
1693 return "permerror"
1694 }
1695 return "temperror"
1696 }
1697 return "error"
1698}
1699