1package queue
2
3import (
4 "bytes"
5 "context"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "net"
11 "os"
12 "strings"
13 "sync/atomic"
14 "time"
15
16 "github.com/prometheus/client_golang/prometheus"
17 "github.com/prometheus/client_golang/prometheus/promauto"
18
19 "github.com/mjl-/adns"
20 "github.com/mjl-/bstore"
21
22 "github.com/mjl-/mox/config"
23 "github.com/mjl-/mox/dns"
24 "github.com/mjl-/mox/dsn"
25 "github.com/mjl-/mox/mlog"
26 "github.com/mjl-/mox/mox-"
27 "github.com/mjl-/mox/mtasts"
28 "github.com/mjl-/mox/mtastsdb"
29 "github.com/mjl-/mox/smtp"
30 "github.com/mjl-/mox/smtpclient"
31 "github.com/mjl-/mox/store"
32 "github.com/mjl-/mox/tlsrpt"
33 "github.com/mjl-/mox/webhook"
34)
35
36// Increased each time an outgoing connection is made for direct delivery. Used by
37// dnsbl monitoring to pace querying.
38var connectionCounter atomic.Int64
39
40var (
41 metricDestinations = promauto.NewCounter(
42 prometheus.CounterOpts{
43 Name: "mox_queue_destinations_total",
44 Help: "Total destination (e.g. MX) lookups for delivery attempts, including those in mox_smtpclient_destinations_authentic_total.",
45 },
46 )
47 metricDestinationsAuthentic = promauto.NewCounter(
48 prometheus.CounterOpts{
49 Name: "mox_queue_destinations_authentic_total",
50 Help: "Destination (e.g. MX) lookups for delivery attempts authenticated with DNSSEC so they are candidates for DANE verification.",
51 },
52 )
53 metricDestinationDANERequired = promauto.NewCounter(
54 prometheus.CounterOpts{
55 Name: "mox_queue_destination_dane_required_total",
56 Help: "Total number of connections to hosts with valid TLSA records making DANE required.",
57 },
58 )
59 metricDestinationDANESTARTTLSUnverified = promauto.NewCounter(
60 prometheus.CounterOpts{
61 Name: "mox_queue_destination_dane_starttlsunverified_total",
62 Help: "Total number of connections with required DANE where all TLSA records were unusable.",
63 },
64 )
65 metricDestinationDANEGatherTLSAErrors = promauto.NewCounter(
66 prometheus.CounterOpts{
67 Name: "mox_queue_destination_dane_gathertlsa_errors_total",
68 Help: "Total number of connections where looking up TLSA records resulted in an error.",
69 },
70 )
71 // todo: recognize when "tls-required-no" message header caused a non-verifying certificate to be overridden. requires doing our own certificate validation after having set tls.Config.InsecureSkipVerify due to tls-required-no.
72 metricTLSRequiredNoIgnored = promauto.NewCounterVec(
73 prometheus.CounterOpts{
74 Name: "mox_queue_tlsrequiredno_ignored_total",
75 Help: "Delivery attempts with TLS policy findings ignored due to message with TLS-Required: No header. Does not cover case where TLS certificate cannot be PKIX-verified.",
76 },
77 []string{
78 "ignored", // mtastspolicy (error getting policy), mtastsmx (mx host not allowed in policy), badtls (error negotiating tls), badtlsa (error fetching dane tlsa records)
79 },
80 )
81 metricRequireTLSUnsupported = promauto.NewCounterVec(
82 prometheus.CounterOpts{
83 Name: "mox_queue_requiretls_unsupported_total",
84 Help: "Delivery attempts that failed due to message with REQUIRETLS.",
85 },
86 []string{
87 "reason", // nopolicy (no mta-sts and no dane), norequiretls (smtp server does not support requiretls)
88 },
89 )
90 metricPlaintextFallback = promauto.NewCounter(
91 prometheus.CounterOpts{
92 Name: "mox_queue_plaintext_fallback_total",
93 Help: "Delivery attempts with fallback to plain text delivery.",
94 },
95 )
96)
97
98func ConnectionCounter() int64 {
99 return connectionCounter.Load()
100}
101
102type msgResp struct {
103 msg *Msg
104 resp smtpclient.Response
105}
106
107// Delivery by directly dialing (MX) hosts for destination domain of message.
108//
109// The returned results are for use in a TLSRPT report, it holds success/failure
110// counts and failure details for delivery/connection attempts. The
111// recipientDomainResult is for policies/counts/failures about the whole recipient
112// domain (MTA-STS), its policy type can be empty, in which case there is no
113// information (e.g. internal failure). hostResults are per-host details (DANE, one
114// per MX target).
115func deliverDirect(qlog mlog.Log, resolver dns.Resolver, dialer smtpclient.Dialer, ourHostname dns.Domain, transportName string, transportDirect *config.TransportDirect, msgs []*Msg, backoff time.Duration) (recipientDomainResult tlsrpt.Result, hostResults []tlsrpt.Result) {
116 // High-level approach:
117 // - Resolve domain to deliver to (CNAME), and determine hosts to try to deliver to (MX)
118 // - Get MTA-STS policy for domain (optional). If present, only deliver to its
119 // allowlisted hosts and verify TLS against CA pool.
120 // - For each host, attempt delivery. If the attempt results in a permanent failure
121 // (as claimed by remote with a 5xx SMTP response, or perhaps decided by us), the
122 // attempt can be aborted. Other errors are often temporary and may result in later
123 // successful delivery. But hopefully the delivery just succeeds. For each host:
124 // - If there is an MTA-STS policy, we only connect to allow-listed hosts.
125 // - We try to lookup DANE records (optional) and verify them if present.
126 // - If RequireTLS is true, we only deliver if the remote SMTP server implements it.
127 // - If RequireTLS is false, we'll fall back to regular delivery attempts without
128 // TLS verification and possibly without TLS at all, ignoring recipient domain/host
129 // MTA-STS and DANE policies.
130
131 // For convenience, we use m0 to access properties that are shared over all
132 // messages we are delivering.
133 m0 := msgs[0]
134
135 // Resolve domain and hosts to attempt delivery to.
136 // These next-hop names are often the name under which we find MX records. The
137 // expanded name is different from the original if the original was a CNAME,
138 // possibly a chain. If there are no MX records, it can be an IP or the host
139 // directly.
140 origNextHop := m0.RecipientDomain.Domain
141 ctx := mox.Shutdown
142 haveMX, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hosts, permanent, err := smtpclient.GatherDestinations(ctx, qlog.Logger, resolver, m0.RecipientDomain)
143 if err != nil {
144 // If this is a DNSSEC authentication error, we'll collect it for TLS reporting.
145 // Hopefully it's a temporary misconfiguration that is solve before we try to send
146 // our report. We don't report as "dnssec-invalid", because that is defined as
147 // being for DANE. ../rfc/8460:580
148 var errCode adns.ErrorCode
149 if errors.As(err, &errCode) && errCode.IsAuthentication() {
150 // Result: ../rfc/8460:567
151 reasonCode := fmt.Sprintf("dns-extended-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
152 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, reasonCode)
153 recipientDomainResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, origNextHop, fd)
154 recipientDomainResult.Summary.TotalFailureSessionCount++
155 }
156 if permanent {
157 err = smtpclient.Error{Permanent: true, Err: err}
158 }
159 failMsgsDB(qlog, msgs, m0.DialedIPs, backoff, dsn.NameIP{}, err)
160 return
161 }
162
163 tlsRequiredNo := m0.RequireTLS != nil && !*m0.RequireTLS
164
165 // Check for MTA-STS policy and enforce it if needed.
166 // We must check at the original next-hop, i.e. recipient domain, not following any
167 // CNAMEs. If we were to follow CNAMEs and ask for MTA-STS at that domain, it
168 // would only take a single CNAME DNS response to direct us to an unrelated domain.
169 var policy *mtasts.Policy // Policy can have mode enforce, testing and none.
170 if !origNextHop.IsZero() {
171 policy, recipientDomainResult, _, err = mtastsdb.Get(ctx, qlog.Logger, resolver, origNextHop)
172 if err != nil {
173 if tlsRequiredNo {
174 qlog.Infox("mtasts lookup temporary error, continuing due to tls-required-no message header", err, slog.Any("domain", origNextHop))
175 metricTLSRequiredNoIgnored.WithLabelValues("mtastspolicy").Inc()
176 } else {
177 qlog.Infox("mtasts lookup temporary error, aborting delivery attempt", err, slog.Any("domain", origNextHop))
178 recipientDomainResult.Summary.TotalFailureSessionCount++
179 failMsgsDB(qlog, msgs, m0.DialedIPs, backoff, dsn.NameIP{}, err)
180 return
181 }
182 }
183 // note: policy can be nil, if a domain does not implement MTA-STS or it's the
184 // first time we fetch the policy and if we encountered an error.
185 }
186
187 // We try delivery to each host until we have success or a permanent failure. So
188 // for transient errors, we'll try the next host. For MX records pointing to a
189 // dual stack host, we turn a permanent failure due to policy on the first delivery
190 // attempt into a temporary failure and make sure to try the other address family
191 // the next attempt. This should reduce issues due to one of our IPs being on a
192 // block list. We won't try multiple IPs of the same address family. Surprisingly,
193 // RFC 5321 does not specify a clear algorithm, but common practice is probably
194 // ../rfc/3974:268.
195 var remoteMTA dsn.NameIP
196 var lastErr = errors.New("no error") // Can be smtpclient.Error.
197 nmissingRequireTLS := 0
198 // todo: should make distinction between host permanently not accepting the message, and the message not being deliverable permanently. e.g. a mx host may have a size limit, or not accept 8bitmime, while another host in the list does accept the message. same for smtputf8, ../rfc/6531:555
199 for _, h := range hosts {
200 // ../rfc/8461:913
201 if policy != nil && policy.Mode != mtasts.ModeNone && !policy.Matches(h.Domain) {
202 // todo: perhaps only send tlsrpt failure if none of the mx hosts matched? reporting about each mismatch seems useful for domain owners, to discover mtasts policies they didn't update after changing mx. there is a risk a domain owner intentionally didn't put all mx'es in the mtasts policy, but they probably won't mind being reported about that.
203 // Other error: Surprising that TLSRPT doesn't have an MTA-STS specific error code
204 // for this case, it's a big part of the reason to have MTA-STS. ../rfc/8460:610
205 // Result: ../rfc/8460:567 todo spec: propose adding a result for this case?
206 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, "mtasts-policy-mx-mismatch")
207 fd.ReceivingMXHostname = h.Domain.ASCII
208 recipientDomainResult.Add(0, 0, fd)
209
210 var policyHosts []string
211 for _, mx := range policy.MX {
212 policyHosts = append(policyHosts, mx.LogString())
213 }
214 if policy.Mode == mtasts.ModeEnforce {
215 if tlsRequiredNo {
216 qlog.Info("mx host does not match mta-sts policy in mode enforce, ignoring due to tls-required-no message header", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
217 metricTLSRequiredNoIgnored.WithLabelValues("mtastsmx").Inc()
218 } else {
219 lastErr = fmt.Errorf("mx host %s does not match enforced mta-sts policy with hosts %s", h.Domain, strings.Join(policyHosts, ","))
220 qlog.Error("mx host does not match mta-sts policy in mode enforce, skipping", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
221 recipientDomainResult.Summary.TotalFailureSessionCount++
222 continue
223 }
224 } else {
225 qlog.Error("mx host does not match mta-sts policy, but it is not enforced, continuing", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
226 }
227 }
228
229 qlog.Info("delivering to remote", slog.Any("remote", h))
230 nqlog := qlog.WithCid(mox.Cid())
231 var remoteIP net.IP
232
233 enforceMTASTS := policy != nil && policy.Mode == mtasts.ModeEnforce
234 tlsMode := smtpclient.TLSOpportunistic
235 tlsPKIX := false
236 if enforceMTASTS {
237 tlsMode = smtpclient.TLSRequiredStartTLS
238 tlsPKIX = true
239 // note: smtpclient will still go through PKIX verification, and report about it, but not fail the connection if not passing.
240 }
241
242 // Try to deliver to host. We can get various errors back. Like permanent failure
243 // response codes, TCP, DNSSEC, TLS (opportunistic, i.e. optional with fallback to
244 // without), etc. It's a balancing act to handle these situations correctly. We
245 // don't want to bounce unnecessarily. But also not keep trying if there is no
246 // chance of success.
247 //
248 // deliverHost will report generic TLS and MTA-STS-specific failures in
249 // recipientDomainResult. If DANE is encountered, it will add a DANE reporting
250 // result for generic TLS and DANE-specific errors.
251
252 msgResps := make([]*msgResp, len(msgs))
253 for i := range msgs {
254 msgResps[i] = &msgResp{msg: msgs[i]}
255 }
256
257 result := deliverHost(nqlog, resolver, dialer, ourHostname, transportName, transportDirect, h, enforceMTASTS, haveMX, origNextHopAuthentic, origNextHop, expandedNextHopAuthentic, expandedNextHop, msgResps, tlsMode, tlsPKIX, &recipientDomainResult)
258
259 var zerotype tlsrpt.PolicyType
260 if result.hostResult.Policy.Type != zerotype {
261 hostResults = append(hostResults, result.hostResult)
262 }
263
264 // If we had a TLS-related failure when doing TLS, and we don't have a requirement
265 // for MTA-STS/DANE, we try again without TLS. This could be an old server that
266 // only does ancient TLS versions, or has a misconfiguration. Note that
267 // opportunistic TLS does not do regular certificate verification, so that can't be
268 // the problem.
269 // ../rfc/7435:459
270 // We don't fall back to plain text for DMARC reports. ../rfc/7489:1768 ../rfc/7489:2683
271 // We queue outgoing TLS reports with tlsRequiredNo, so reports can be delivered in
272 // case of broken TLS.
273 if result.err != nil && errors.Is(result.err, smtpclient.ErrTLS) && (!enforceMTASTS && tlsMode == smtpclient.TLSOpportunistic && !result.tlsDANE && !m0.IsDMARCReport || tlsRequiredNo) {
274 metricPlaintextFallback.Inc()
275 if tlsRequiredNo {
276 metricTLSRequiredNoIgnored.WithLabelValues("badtls").Inc()
277 }
278
279 // todo future: add a configuration option to not fall back?
280 nqlog.Info("connecting again for delivery attempt without tls",
281 slog.Bool("enforcemtasts", enforceMTASTS),
282 slog.Bool("tlsdane", result.tlsDANE),
283 slog.Any("requiretls", m0.RequireTLS))
284 result = deliverHost(nqlog, resolver, dialer, ourHostname, transportName, transportDirect, h, enforceMTASTS, haveMX, origNextHopAuthentic, origNextHop, expandedNextHopAuthentic, expandedNextHop, msgResps, smtpclient.TLSSkip, false, &tlsrpt.Result{})
285 }
286
287 remoteMTA = dsn.NameIP{Name: h.XString(false), IP: remoteIP}
288 if result.err != nil {
289 lastErr = result.err
290 var cerr smtpclient.Error
291 if errors.As(result.err, &cerr) {
292 if cerr.Secode == smtp.SePol7MissingReqTLS30 {
293 nmissingRequireTLS++
294 }
295 if cerr.Permanent {
296 break
297 }
298 }
299 continue
300 }
301
302 delMsgs := make([]Msg, len(result.delivered))
303 for i, mr := range result.delivered {
304 mqlog := nqlog.With(slog.Int64("msgid", mr.msg.ID), slog.Any("recipient", mr.msg.Recipient()))
305 mqlog.Info("delivered from queue")
306 mr.msg.markResult(mr.resp.Code, mr.resp.Secode, "", true)
307 delMsgs[i] = *mr.msg
308 }
309 if len(delMsgs) > 0 {
310 err := DB.Write(context.Background(), func(tx *bstore.Tx) error {
311 return retireMsgs(nqlog, tx, webhook.EventDelivered, 0, "", nil, delMsgs...)
312 })
313 if err != nil {
314 nqlog.Errorx("deleting messages from queue database after delivery", err)
315 } else if err := removeMsgsFS(nqlog, delMsgs...); err != nil {
316 nqlog.Errorx("removing queued messages from file system after delivery", err)
317 }
318 kick()
319 }
320 if len(result.failed) > 0 {
321 err := DB.Write(context.Background(), func(tx *bstore.Tx) error {
322 for _, mr := range result.failed {
323 failMsgsTx(nqlog, tx, []*Msg{mr.msg}, m0.DialedIPs, backoff, remoteMTA, smtpclient.Error(mr.resp))
324 }
325 return nil
326 })
327 if err != nil {
328 for _, mr := range result.failed {
329 nqlog.Errorx("error processing delivery failure for messages", err,
330 slog.Int64("msgid", mr.msg.ID),
331 slog.Any("recipient", mr.msg.Recipient()))
332 }
333 }
334 kick()
335 }
336 return
337 }
338
339 // In theory, we could make a failure permanent if we didn't find any mx host
340 // matching the mta-sts policy AND the policy is fresh AND all DNS records leading
341 // to the MX targets (including CNAME) have a TTL that is beyond the latest
342 // possible delivery attempt. Until that time, configuration problems can be
343 // corrected through DNS or policy update. Not sure if worth it in practice, there
344 // is a good chance the MX records can still change, at least on initial delivery
345 // failures.
346 // todo: possibly detect that future deliveries will fail due to long ttl's of cached records that are preventing delivery.
347
348 // If we failed due to requiretls not being satisfied, make the delivery permanent.
349 // It is unlikely the recipient domain will implement requiretls during our retry
350 // period. Best to let the sender know immediately.
351 if len(hosts) > 0 && nmissingRequireTLS == len(hosts) {
352 qlog.Info("marking delivery as permanently failed because recipient domain does not implement requiretls")
353 err := smtpclient.Error{
354 Permanent: true,
355 Code: smtp.C554TransactionFailed,
356 Secode: smtp.SePol7MissingReqTLS30,
357 Err: fmt.Errorf("destination servers do not support requiretls"),
358 }
359 failMsgsDB(qlog, msgs, m0.DialedIPs, backoff, remoteMTA, err)
360 return
361 }
362
363 failMsgsDB(qlog, msgs, m0.DialedIPs, backoff, remoteMTA, lastErr)
364 return
365}
366
367type deliverResult struct {
368 tlsDANE bool
369 remoteIP net.IP
370 hostResult tlsrpt.Result
371
372 // If err is set, no messages were delivered but delivered and failed are still
373 // nil. If err is not set, delivered and always add up to all msgs requested to be
374 // sent. All messages can be in failed.
375 delivered []*msgResp
376 failed []*msgResp
377 err error
378}
379
380// deliverHost attempts to deliver msgs to host. All msgs must have the same
381// delivery requirements (e.g. requiretls). Depending on tlsMode we'll do
382// opportunistic or required STARTTLS or skip TLS entirely. Based on tlsPKIX we do
383// PKIX/WebPKI verification (for MTA-STS). If we encounter DANE records, we verify
384// those. If the message has a message header "TLS-Required: No", we ignore TLS
385// verification errors.
386//
387// deliverHost updates DialedIPs of msgs, which must be saved in case of failure to
388// deliver.
389//
390// The haveMX and next-hop-authentic fields are used to determine if DANE is
391// applicable. The next-hop fields themselves are used to determine valid names
392// during DANE TLS certificate verification.
393//
394// The returned hostResult holds TLSRPT reporting results for the connection
395// attempt. Its policy type can be the zero value, indicating there was no finding
396// (e.g. internal error).
397//
398// deliverHost may send a message multiple times: if the server doesn't accept
399// multiple recipients for a message.
400func deliverHost(log mlog.Log, resolver dns.Resolver, dialer smtpclient.Dialer, ourHostname dns.Domain, transportName string, transportDirect *config.TransportDirect, host dns.IPDomain, enforceMTASTS, haveMX, origNextHopAuthentic bool, origNextHop dns.Domain, expandedNextHopAuthentic bool, expandedNextHop dns.Domain, msgResps []*msgResp, tlsMode smtpclient.TLSMode, tlsPKIX bool, recipientDomainResult *tlsrpt.Result) (result deliverResult) {
401 // About attempting delivery to multiple addresses of a host: ../rfc/5321:3898
402
403 m0 := msgResps[0].msg
404 tlsRequiredNo := m0.RequireTLS != nil && !*m0.RequireTLS
405
406 var tlsDANE bool
407 var remoteIP net.IP
408 var hostResult tlsrpt.Result
409 start := time.Now()
410 defer func() {
411 result.tlsDANE = tlsDANE
412 result.remoteIP = remoteIP
413 result.hostResult = hostResult
414
415 mode := string(tlsMode)
416 if tlsPKIX {
417 mode += "+mtasts"
418 }
419 if tlsDANE {
420 mode += "+dane"
421 }
422
423 r := deliveryResult(result.err, len(result.delivered), len(result.failed))
424 d := float64(time.Since(start)) / float64(time.Second)
425 metricDelivery.WithLabelValues(fmt.Sprintf("%d", m0.Attempts), transportName, mode, r).Observe(d)
426
427 log.Debugx("queue deliverhost result", result.err,
428 slog.Any("host", host),
429 slog.String("result", r),
430 slog.Int("delivered", len(result.delivered)),
431 slog.Int("failed", len(result.failed)),
432 slog.Any("tlsmode", tlsMode),
433 slog.Bool("tlspkix", tlsPKIX),
434 slog.Bool("tlsdane", tlsDANE),
435 slog.Bool("tlsrequiredno", tlsRequiredNo),
436 slog.Bool("badtls", result.err != nil && errors.Is(result.err, smtpclient.ErrTLS)),
437 slog.Duration("duration", time.Since(start)))
438 }()
439
440 // Open message to deliver.
441 f, err := os.Open(m0.MessagePath())
442 if err != nil {
443 return deliverResult{err: fmt.Errorf("open message file: %v", err)}
444 }
445 msgr := store.FileMsgReader(m0.MsgPrefix, f)
446 defer func() {
447 err := msgr.Close()
448 log.Check(err, "closing message after delivery attempt")
449 }()
450
451 ctx, cancel := context.WithTimeout(mox.Shutdown, 30*time.Second)
452 defer cancel()
453
454 // We must lookup the IPs for the host name before checking DANE TLSA records. And
455 // only check TLSA records for secure responses. This prevents problems with old
456 // name servers returning an error for TLSA requests or letting it timeout (not
457 // sending a response). ../rfc/7672:879
458 var daneRecords []adns.TLSA
459 var tlsHostnames []dns.Domain
460 if host.IsDomain() {
461 tlsHostnames = []dns.Domain{host.Domain}
462 }
463 for _, mr := range msgResps {
464 if mr.msg.DialedIPs == nil {
465 mr.msg.DialedIPs = map[string][]net.IP{}
466 }
467 }
468
469 countResultFailure := func() {
470 recipientDomainResult.Summary.TotalFailureSessionCount++
471 hostResult.Summary.TotalFailureSessionCount++
472 }
473
474 metricDestinations.Inc()
475 network := "ip"
476 if transportDirect != nil {
477 if network != transportDirect.IPFamily {
478 log.Debug("set custom IP network family for direct transport", slog.Any("network", transportDirect.IPFamily))
479 network = transportDirect.IPFamily
480 }
481 }
482 authentic, expandedAuthentic, expandedHost, ips, dualstack, err := smtpclient.GatherIPs(ctx, log.Logger, resolver, network, host, m0.DialedIPs)
483 destAuthentic := err == nil && authentic && origNextHopAuthentic && (!haveMX || expandedNextHopAuthentic) && host.IsDomain()
484 if !destAuthentic {
485 log.Debugx("not attempting verification with dane", err, slog.Bool("authentic", authentic), slog.Bool("expandedauthentic", expandedAuthentic))
486
487 // Track a DNSSEC error if found.
488 var errCode adns.ErrorCode
489 if err != nil {
490 if errors.As(err, &errCode) && errCode.IsAuthentication() {
491 // Result: ../rfc/8460:567
492 reasonCode := fmt.Sprintf("dns-extended-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
493 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, reasonCode)
494 hostResult = tlsrpt.MakeResult(tlsrpt.TLSA, host.Domain, fd)
495 countResultFailure()
496 }
497 } else {
498 // todo: we could lookup tlsa records, and log an error when they are not dnssec-signed. this should be interpreted simply as "not doing dane", but it could be useful to warn domain owners about, they may be under the impression they are dane-protected.
499 hostResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, host.Domain)
500 }
501 } else if tlsMode == smtpclient.TLSSkip {
502 metricDestinationsAuthentic.Inc()
503
504 // TLSSkip is used to fallback to plaintext, which is used with a TLS-Required: No
505 // header to ignore the recipient domain's DANE policy.
506
507 // possible err is propagated to below.
508 } else {
509 metricDestinationsAuthentic.Inc()
510
511 // Look for TLSA records in either the expandedHost, or otherwise the original
512 // host. ../rfc/7672:912
513 var tlsaBaseDomain dns.Domain
514 tlsDANE, daneRecords, tlsaBaseDomain, err = smtpclient.GatherTLSA(ctx, log.Logger, resolver, host.Domain, expandedNextHopAuthentic && expandedAuthentic, expandedHost)
515 if tlsDANE {
516 metricDestinationDANERequired.Inc()
517 }
518 if err != nil {
519 metricDestinationDANEGatherTLSAErrors.Inc()
520 }
521 if err == nil && tlsDANE {
522 tlsMode = smtpclient.TLSRequiredStartTLS
523 hostResult = tlsrpt.Result{Policy: tlsrpt.TLSAPolicy(daneRecords, tlsaBaseDomain)}
524 if len(daneRecords) == 0 {
525 // If there are no usable DANE records, we still have to use TLS, but without
526 // verifying its certificate. At least when there is no MTA-STS. Why? Perhaps to
527 // prevent ossification? The SMTP TLSA specification has different behaviour than
528 // the generic TLSA. "Usable" means different things in different places.
529 // ../rfc/7672:718 ../rfc/6698:1845 ../rfc/6698:660
530 log.Debug("no usable dane records, requiring starttls but not verifying with dane")
531 metricDestinationDANESTARTTLSUnverified.Inc()
532 daneRecords = nil
533 // Result: ../rfc/8460:576 (this isn't technicall invalid, only all-unusable...)
534 hostResult.FailureDetails = []tlsrpt.FailureDetails{
535 {
536 ResultType: tlsrpt.ResultTLSAInvalid,
537 ReceivingMXHostname: host.XString(false),
538 FailureReasonCode: "all-unusable-records+ignored",
539 },
540 }
541 } else {
542 log.Debug("delivery with required starttls with dane verification", slog.Any("allowedtlshostnames", tlsHostnames))
543 }
544 // Based on CNAMEs followed and DNSSEC-secure status, we must allow up to 4 host
545 // names.
546 tlsHostnames = smtpclient.GatherTLSANames(haveMX, expandedNextHopAuthentic, expandedAuthentic, origNextHop, expandedNextHop, host.Domain, tlsaBaseDomain)
547 } else if !tlsDANE {
548 log.Debugx("not doing opportunistic dane after gathering tlsa records", err)
549 err = nil
550 hostResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, tlsaBaseDomain)
551 } else if err != nil {
552 fd := tlsrpt.Details(tlsrpt.ResultTLSAInvalid, "")
553 var errCode adns.ErrorCode
554 if errors.As(err, &errCode) {
555 fd.FailureReasonCode = fmt.Sprintf("extended-dns-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
556 if errCode.IsAuthentication() {
557 // Result: ../rfc/8460:580
558 fd.ResultType = tlsrpt.ResultDNSSECInvalid
559 countResultFailure()
560 }
561 }
562 hostResult = tlsrpt.Result{
563 Policy: tlsrpt.TLSAPolicy(daneRecords, tlsaBaseDomain),
564 FailureDetails: []tlsrpt.FailureDetails{fd},
565 }
566
567 if tlsRequiredNo {
568 log.Debugx("error gathering dane tlsa records with dane required, but continuing without validation due to tls-required-no message header", err)
569 err = nil
570 metricTLSRequiredNoIgnored.WithLabelValues("badtlsa").Inc()
571 }
572 }
573 // else, err is propagated below.
574 }
575
576 // todo: for requiretls, should an MTA-STS policy in mode testing be treated as good enough for requiretls? let's be strict and assume not.
577 // todo: ../rfc/8689:276 seems to specify stricter requirements on name in certificate than DANE (which allows original recipient domain name and cname-expanded name, and hints at following CNAME for MX targets as well, allowing both their original and expanded names too). perhaps the intent was just to say the name must be validated according to the relevant specifications?
578 // todo: for requiretls, should we allow no usable dane records with requiretls? dane allows it, but doesn't seem in spirit of requiretls, so not allowing it.
579 if err == nil && m0.RequireTLS != nil && *m0.RequireTLS && !(tlsDANE && len(daneRecords) > 0) && !enforceMTASTS {
580 log.Info("verified tls is required, but destination has no usable dane records and no mta-sts policy, canceling delivery attempt to host")
581 metricRequireTLSUnsupported.WithLabelValues("nopolicy").Inc()
582 // Resond with proper enhanced status code. ../rfc/8689:301
583 smtpErr := smtpclient.Error{
584 Code: smtp.C554TransactionFailed,
585 Secode: smtp.SePol7MissingReqTLS30,
586 Err: fmt.Errorf("missing required tls verification mechanism"),
587 }
588 return deliverResult{err: smtpErr}
589 }
590
591 // Dial the remote host given the IPs if no error yet.
592 var conn net.Conn
593 if err == nil {
594 connectionCounter.Add(1)
595 conn, remoteIP, err = smtpclient.Dial(ctx, log.Logger, dialer, host, ips, 25, m0.DialedIPs, mox.Conf.Static.SpecifiedSMTPListenIPs)
596 }
597 cancel()
598
599 // Set error for metrics.
600 var dialResult string
601 switch {
602 case err == nil:
603 dialResult = "ok"
604 case errors.Is(err, os.ErrDeadlineExceeded), errors.Is(err, context.DeadlineExceeded):
605 dialResult = "timeout"
606 case errors.Is(err, context.Canceled):
607 dialResult = "canceled"
608 default:
609 dialResult = "error"
610 }
611 metricConnection.WithLabelValues(dialResult).Inc()
612 if err != nil {
613 log.Debugx("connecting to remote smtp", err, slog.Any("host", host))
614 return deliverResult{err: fmt.Errorf("dialing smtp server: %v", err)}
615 }
616
617 var mailFrom string
618 if m0.SenderLocalpart != "" || !m0.SenderDomain.IsZero() {
619 mailFrom = m0.Sender().XString(m0.SMTPUTF8)
620 }
621
622 // todo future: get closer to timeouts specified in rfc? ../rfc/5321:3610
623 log = log.With(slog.Any("remoteip", remoteIP))
624 ctx, cancel = context.WithTimeout(mox.Shutdown, 30*time.Minute)
625 defer cancel()
626 mox.Connections.Register(conn, "smtpclient", "queue")
627
628 // Initialize SMTP session, sending EHLO/HELO and STARTTLS with specified tls mode.
629 var firstHost dns.Domain
630 var moreHosts []dns.Domain
631 if len(tlsHostnames) > 0 {
632 // For use with DANE-TA.
633 firstHost = tlsHostnames[0]
634 moreHosts = tlsHostnames[1:]
635 }
636 var verifiedRecord adns.TLSA
637 opts := smtpclient.Opts{
638 IgnoreTLSVerifyErrors: tlsRequiredNo,
639 RootCAs: mox.Conf.Static.TLS.CertPool,
640 DANERecords: daneRecords,
641 DANEMoreHostnames: moreHosts,
642 DANEVerifiedRecord: &verifiedRecord,
643 RecipientDomainResult: recipientDomainResult,
644 HostResult: &hostResult,
645 }
646 sc, err := smtpclient.New(ctx, log.Logger, conn, tlsMode, tlsPKIX, ourHostname, firstHost, opts)
647 defer func() {
648 if sc == nil {
649 err := conn.Close()
650 log.Check(err, "closing smtp tcp connection")
651 } else {
652 err := sc.Close()
653 log.Check(err, "closing smtp connection")
654 }
655 mox.Connections.Unregister(conn)
656 }()
657 if err == nil && m0.SenderAccount != "" {
658 // Remember the STARTTLS and REQUIRETLS support for this recipient domain.
659 // It is used in the webmail client, to show the recipient domain security mechanisms.
660 // We always save only the last connection we actually encountered. There may be
661 // multiple MX hosts, perhaps only some support STARTTLS and REQUIRETLS. We may not
662 // be accurate for the whole domain, but we're only storing a hint.
663 rdt := store.RecipientDomainTLS{
664 Domain: m0.RecipientDomain.Domain.Name(),
665 STARTTLS: sc.TLSConnectionState() != nil,
666 RequireTLS: sc.SupportsRequireTLS(),
667 }
668 if err = updateRecipientDomainTLS(ctx, log, m0.SenderAccount, rdt); err != nil {
669 err = fmt.Errorf("storing recipient domain tls status: %w", err)
670 }
671 }
672
673 inspectError := func(err error) error {
674 if cerr, ok := err.(smtpclient.Error); ok {
675 // If we are being rejected due to policy reasons on the first
676 // attempt and remote has both IPv4 and IPv6, we'll give it
677 // another try. Our first IP may be in a block list, the address for
678 // the other family perhaps is not.
679
680 if cerr.Permanent && m0.Attempts == 1 && dualstack && strings.HasPrefix(cerr.Secode, "7.") {
681 log.Debugx("change error type from permanent to transient", err, slog.Any("host", host), slog.Any("secode", cerr.Secode))
682 cerr.Permanent = false
683 }
684 // If server does not implement requiretls, respond with that code. ../rfc/8689:301
685 if errors.Is(cerr.Err, smtpclient.ErrRequireTLSUnsupported) {
686 cerr.Secode = smtp.SePol7MissingReqTLS30
687 metricRequireTLSUnsupported.WithLabelValues("norequiretls").Inc()
688 }
689 return cerr
690 }
691 return err
692 }
693
694 if err != nil {
695 return deliverResult{err: inspectError(err)}
696 }
697
698 // SMTP session is ready. Finally try to actually deliver.
699 has8bit := m0.Has8bit
700 smtputf8 := m0.SMTPUTF8
701 var msg io.Reader = msgr
702 resetReader := msgr.Reset
703 size := m0.Size
704 if m0.DSNUTF8 != nil && sc.Supports8BITMIME() && sc.SupportsSMTPUTF8() {
705 has8bit = true
706 smtputf8 = true
707 size = int64(len(m0.DSNUTF8))
708 msg = bytes.NewReader(m0.DSNUTF8)
709 resetReader = func() {
710 msg = bytes.NewReader(m0.DSNUTF8)
711 }
712 }
713
714 // Try to deliver messages. We'll do multiple transactions if the smtp server responds
715 // with "too many recipients".
716 todo := msgResps
717 var delivered, failed []*msgResp
718 for len(todo) > 0 {
719 resetReader()
720
721 // SMTP server may limit number of recipients in single transaction.
722 n := len(todo)
723 if sc.ExtLimitRcptMax > 0 && sc.ExtLimitRcptMax < len(todo) {
724 n = sc.ExtLimitRcptMax
725 }
726
727 rcpts := make([]string, n)
728 for i, mr := range todo[:n] {
729 rcpts[i] = mr.msg.Recipient().XString(m0.SMTPUTF8)
730 }
731
732 // Only require that remote announces 8bitmime extension when in pedantic mode. All
733 // relevant systems nowadays should accept "8-bit" messages, some unfortunately
734 // don't announce support. In theory we could rewrite the submitted message to be
735 // 7-bit-only, but the trouble likely isn't worth it.
736 req8bit := has8bit && mox.Pedantic
737
738 resps, err := sc.DeliverMultiple(ctx, mailFrom, rcpts, size, msg, req8bit, smtputf8, m0.RequireTLS != nil && *m0.RequireTLS)
739 if err != nil && (len(resps) == 0 && n == len(msgResps) || len(resps) == len(msgResps)) {
740 // If error and it applies to all recipients, return a single error.
741 return deliverResult{err: inspectError(err)}
742 }
743 var ntodo []*msgResp
744 for i, mr := range todo[:n] {
745 if err != nil {
746 if cerr, ok := err.(smtpclient.Error); ok {
747 mr.resp = smtpclient.Response(cerr)
748 } else {
749 mr.resp = smtpclient.Response{Err: err}
750 }
751 failed = append(failed, mr)
752 } else if i > 0 && (resps[i].Code == smtp.C452StorageFull || resps[i].Code == smtp.C552MailboxFull) {
753 ntodo = append(ntodo, mr)
754 } else if resps[i].Code == smtp.C250Completed {
755 mr.resp = resps[i]
756 delivered = append(delivered, mr)
757 } else {
758 failed = append(failed, mr)
759 }
760 }
761 todo = append(ntodo, todo[n:]...)
762
763 // We don't take LIMITS MAILMAX into account. Multiple MAIL commands are normal in
764 // SMTP. If the server doesn't support that, it will likely return a temporary
765 // error. So at least we'll try again. This would be quite unusual. And wasteful,
766 // because we would immediately dial again, do the TLS handshake, EHLO, etc. Let's
767 // implement such a limit when we see it in practice.
768 }
769
770 return deliverResult{delivered: delivered, failed: failed}
771}
772
773// Update (overwite) last known starttls/requiretls support for recipient domain.
774func updateRecipientDomainTLS(ctx context.Context, log mlog.Log, senderAccount string, rdt store.RecipientDomainTLS) error {
775 acc, err := store.OpenAccount(log, senderAccount, false)
776 if err != nil {
777 return fmt.Errorf("open account: %w", err)
778 }
779 defer func() {
780 err := acc.Close()
781 log.Check(err, "closing account")
782 }()
783 err = acc.DB.Write(ctx, func(tx *bstore.Tx) error {
784 // First delete any existing record.
785 if err := tx.Delete(&store.RecipientDomainTLS{Domain: rdt.Domain}); err != nil && err != bstore.ErrAbsent {
786 return fmt.Errorf("removing previous recipient domain tls status: %w", err)
787 }
788 // Insert new record.
789 return tx.Insert(&rdt)
790 })
791 if err != nil {
792 return fmt.Errorf("adding recipient domain tls status to account database: %w", err)
793 }
794 return nil
795}
796