1package queue
2
3import (
4 "bytes"
5 "context"
6 "errors"
7 "fmt"
8 "io"
9 "log/slog"
10 "net"
11 "os"
12 "strings"
13 "sync/atomic"
14 "time"
15
16 "github.com/prometheus/client_golang/prometheus"
17 "github.com/prometheus/client_golang/prometheus/promauto"
18
19 "github.com/mjl-/adns"
20 "github.com/mjl-/bstore"
21
22 "github.com/mjl-/mox/dns"
23 "github.com/mjl-/mox/dsn"
24 "github.com/mjl-/mox/mlog"
25 "github.com/mjl-/mox/mox-"
26 "github.com/mjl-/mox/mtasts"
27 "github.com/mjl-/mox/mtastsdb"
28 "github.com/mjl-/mox/smtp"
29 "github.com/mjl-/mox/smtpclient"
30 "github.com/mjl-/mox/store"
31 "github.com/mjl-/mox/tlsrpt"
32)
33
34// Increased each time an outgoing connection is made for direct delivery. Used by
35// dnsbl monitoring to pace querying.
36var connectionCounter atomic.Int64
37
38var (
39 metricDestinations = promauto.NewCounter(
40 prometheus.CounterOpts{
41 Name: "mox_queue_destinations_total",
42 Help: "Total destination (e.g. MX) lookups for delivery attempts, including those in mox_smtpclient_destinations_authentic_total.",
43 },
44 )
45 metricDestinationsAuthentic = promauto.NewCounter(
46 prometheus.CounterOpts{
47 Name: "mox_queue_destinations_authentic_total",
48 Help: "Destination (e.g. MX) lookups for delivery attempts authenticated with DNSSEC so they are candidates for DANE verification.",
49 },
50 )
51 metricDestinationDANERequired = promauto.NewCounter(
52 prometheus.CounterOpts{
53 Name: "mox_queue_destination_dane_required_total",
54 Help: "Total number of connections to hosts with valid TLSA records making DANE required.",
55 },
56 )
57 metricDestinationDANESTARTTLSUnverified = promauto.NewCounter(
58 prometheus.CounterOpts{
59 Name: "mox_queue_destination_dane_starttlsunverified_total",
60 Help: "Total number of connections with required DANE where all TLSA records were unusable.",
61 },
62 )
63 metricDestinationDANEGatherTLSAErrors = promauto.NewCounter(
64 prometheus.CounterOpts{
65 Name: "mox_queue_destination_dane_gathertlsa_errors_total",
66 Help: "Total number of connections where looking up TLSA records resulted in an error.",
67 },
68 )
69 // todo: recognize when "tls-required-no" message header caused a non-verifying certificate to be overridden. requires doing our own certificate validation after having set tls.Config.InsecureSkipVerify due to tls-required-no.
70 metricTLSRequiredNoIgnored = promauto.NewCounterVec(
71 prometheus.CounterOpts{
72 Name: "mox_queue_tlsrequiredno_ignored_total",
73 Help: "Delivery attempts with TLS policy findings ignored due to message with TLS-Required: No header. Does not cover case where TLS certificate cannot be PKIX-verified.",
74 },
75 []string{
76 "ignored", // mtastspolicy (error getting policy), mtastsmx (mx host not allowed in policy), badtls (error negotiating tls), badtlsa (error fetching dane tlsa records)
77 },
78 )
79 metricRequireTLSUnsupported = promauto.NewCounterVec(
80 prometheus.CounterOpts{
81 Name: "mox_queue_requiretls_unsupported_total",
82 Help: "Delivery attempts that failed due to message with REQUIRETLS.",
83 },
84 []string{
85 "reason", // nopolicy (no mta-sts and no dane), norequiretls (smtp server does not support requiretls)
86 },
87 )
88 metricPlaintextFallback = promauto.NewCounter(
89 prometheus.CounterOpts{
90 Name: "mox_queue_plaintext_fallback_total",
91 Help: "Delivery attempts with fallback to plain text delivery.",
92 },
93 )
94)
95
96func ConnectionCounter() int64 {
97 return connectionCounter.Load()
98}
99
100type msgResp struct {
101 msg *Msg
102 resp smtpclient.Response
103}
104
105// Delivery by directly dialing (MX) hosts for destination domain of message.
106//
107// The returned results are for use in a TLSRPT report, it holds success/failure
108// counts and failure details for delivery/connection attempts. The
109// recipientDomainResult is for policies/counts/failures about the whole recipient
110// domain (MTA-STS), its policy type can be empty, in which case there is no
111// information (e.g. internal failure). hostResults are per-host details (DANE, one
112// per MX target).
113func deliverDirect(qlog mlog.Log, resolver dns.Resolver, dialer smtpclient.Dialer, ourHostname dns.Domain, transportName string, msgs []*Msg, backoff time.Duration) (recipientDomainResult tlsrpt.Result, hostResults []tlsrpt.Result) {
114 // High-level approach:
115 // - Resolve domain to deliver to (CNAME), and determine hosts to try to deliver to (MX)
116 // - Get MTA-STS policy for domain (optional). If present, only deliver to its
117 // allowlisted hosts and verify TLS against CA pool.
118 // - For each host, attempt delivery. If the attempt results in a permanent failure
119 // (as claimed by remote with a 5xx SMTP response, or perhaps decided by us), the
120 // attempt can be aborted. Other errors are often temporary and may result in later
121 // successful delivery. But hopefully the delivery just succeeds. For each host:
122 // - If there is an MTA-STS policy, we only connect to allow-listed hosts.
123 // - We try to lookup DANE records (optional) and verify them if present.
124 // - If RequireTLS is true, we only deliver if the remote SMTP server implements it.
125 // - If RequireTLS is false, we'll fall back to regular delivery attempts without
126 // TLS verification and possibly without TLS at all, ignoring recipient domain/host
127 // MTA-STS and DANE policies.
128
129 // For convenience, we use m0 to access properties that are shared over all
130 // messages we are delivering.
131 m0 := msgs[0]
132
133 // Resolve domain and hosts to attempt delivery to.
134 // These next-hop names are often the name under which we find MX records. The
135 // expanded name is different from the original if the original was a CNAME,
136 // possibly a chain. If there are no MX records, it can be an IP or the host
137 // directly.
138 origNextHop := m0.RecipientDomain.Domain
139 ctx := mox.Shutdown
140 haveMX, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hosts, permanent, err := smtpclient.GatherDestinations(ctx, qlog.Logger, resolver, m0.RecipientDomain)
141 if err != nil {
142 // If this is a DNSSEC authentication error, we'll collect it for TLS reporting.
143 // Hopefully it's a temporary misconfiguration that is solve before we try to send
144 // our report. We don't report as "dnssec-invalid", because that is defined as
145 // being for DANE. ../rfc/8460:580
146 var errCode adns.ErrorCode
147 if errors.As(err, &errCode) && errCode.IsAuthentication() {
148 // Result: ../rfc/8460:567
149 reasonCode := fmt.Sprintf("dns-extended-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
150 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, reasonCode)
151 recipientDomainResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, origNextHop, fd)
152 recipientDomainResult.Summary.TotalFailureSessionCount++
153 }
154 if permanent {
155 err = smtpclient.Error{Permanent: true, Err: err}
156 }
157 fail(ctx, qlog, msgs, m0.DialedIPs, backoff, dsn.NameIP{}, err)
158 return
159 }
160
161 tlsRequiredNo := m0.RequireTLS != nil && !*m0.RequireTLS
162
163 // Check for MTA-STS policy and enforce it if needed.
164 // We must check at the original next-hop, i.e. recipient domain, not following any
165 // CNAMEs. If we were to follow CNAMEs and ask for MTA-STS at that domain, it
166 // would only take a single CNAME DNS response to direct us to an unrelated domain.
167 var policy *mtasts.Policy // Policy can have mode enforce, testing and none.
168 if !origNextHop.IsZero() {
169 policy, recipientDomainResult, _, err = mtastsdb.Get(ctx, qlog.Logger, resolver, origNextHop)
170 if err != nil {
171 if tlsRequiredNo {
172 qlog.Infox("mtasts lookup temporary error, continuing due to tls-required-no message header", err, slog.Any("domain", origNextHop))
173 metricTLSRequiredNoIgnored.WithLabelValues("mtastspolicy").Inc()
174 } else {
175 qlog.Infox("mtasts lookup temporary error, aborting delivery attempt", err, slog.Any("domain", origNextHop))
176 recipientDomainResult.Summary.TotalFailureSessionCount++
177 fail(ctx, qlog, msgs, m0.DialedIPs, backoff, dsn.NameIP{}, err)
178 return
179 }
180 }
181 // note: policy can be nil, if a domain does not implement MTA-STS or it's the
182 // first time we fetch the policy and if we encountered an error.
183 }
184
185 // We try delivery to each host until we have success or a permanent failure. So
186 // for transient errors, we'll try the next host. For MX records pointing to a
187 // dual stack host, we turn a permanent failure due to policy on the first delivery
188 // attempt into a temporary failure and make sure to try the other address family
189 // the next attempt. This should reduce issues due to one of our IPs being on a
190 // block list. We won't try multiple IPs of the same address family. Surprisingly,
191 // RFC 5321 does not specify a clear algorithm, but common practice is probably
192 // ../rfc/3974:268.
193 var remoteMTA dsn.NameIP
194 var lastErr = errors.New("no error") // Can be smtpclient.Error.
195 nmissingRequireTLS := 0
196 // todo: should make distinction between host permanently not accepting the message, and the message not being deliverable permanently. e.g. a mx host may have a size limit, or not accept 8bitmime, while another host in the list does accept the message. same for smtputf8, ../rfc/6531:555
197 for _, h := range hosts {
198 // ../rfc/8461:913
199 if policy != nil && policy.Mode != mtasts.ModeNone && !policy.Matches(h.Domain) {
200 // todo: perhaps only send tlsrpt failure if none of the mx hosts matched? reporting about each mismatch seems useful for domain owners, to discover mtasts policies they didn't update after changing mx. there is a risk a domain owner intentionally didn't put all mx'es in the mtasts policy, but they probably won't mind being reported about that.
201 // Other error: Surprising that TLSRPT doesn't have an MTA-STS specific error code
202 // for this case, it's a big part of the reason to have MTA-STS. ../rfc/8460:610
203 // Result: ../rfc/8460:567 todo spec: propose adding a result for this case?
204 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, "mtasts-policy-mx-mismatch")
205 fd.ReceivingMXHostname = h.Domain.ASCII
206 recipientDomainResult.Add(0, 0, fd)
207
208 var policyHosts []string
209 for _, mx := range policy.MX {
210 policyHosts = append(policyHosts, mx.LogString())
211 }
212 if policy.Mode == mtasts.ModeEnforce {
213 if tlsRequiredNo {
214 qlog.Info("mx host does not match mta-sts policy in mode enforce, ignoring due to tls-required-no message header", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
215 metricTLSRequiredNoIgnored.WithLabelValues("mtastsmx").Inc()
216 } else {
217 lastErr = fmt.Errorf("mx host %s does not match enforced mta-sts policy with hosts %s", h.Domain, strings.Join(policyHosts, ","))
218 qlog.Error("mx host does not match mta-sts policy in mode enforce, skipping", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
219 recipientDomainResult.Summary.TotalFailureSessionCount++
220 continue
221 }
222 } else {
223 qlog.Error("mx host does not match mta-sts policy, but it is not enforced, continuing", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
224 }
225 }
226
227 qlog.Info("delivering to remote", slog.Any("remote", h))
228 nqlog := qlog.WithCid(mox.Cid())
229 var remoteIP net.IP
230
231 enforceMTASTS := policy != nil && policy.Mode == mtasts.ModeEnforce
232 tlsMode := smtpclient.TLSOpportunistic
233 tlsPKIX := false
234 if enforceMTASTS {
235 tlsMode = smtpclient.TLSRequiredStartTLS
236 tlsPKIX = true
237 // note: smtpclient will still go through PKIX verification, and report about it, but not fail the connection if not passing.
238 }
239
240 // Try to deliver to host. We can get various errors back. Like permanent failure
241 // response codes, TCP, DNSSEC, TLS (opportunistic, i.e. optional with fallback to
242 // without), etc. It's a balancing act to handle these situations correctly. We
243 // don't want to bounce unnecessarily. But also not keep trying if there is no
244 // chance of success.
245 //
246 // deliverHost will report generic TLS and MTA-STS-specific failures in
247 // recipientDomainResult. If DANE is encountered, it will add a DANE reporting
248 // result for generic TLS and DANE-specific errors.
249
250 msgResps := make([]*msgResp, len(msgs))
251 for i := range msgs {
252 msgResps[i] = &msgResp{msg: msgs[i]}
253 }
254
255 result := deliverHost(nqlog, resolver, dialer, ourHostname, transportName, h, enforceMTASTS, haveMX, origNextHopAuthentic, origNextHop, expandedNextHopAuthentic, expandedNextHop, msgResps, tlsMode, tlsPKIX, &recipientDomainResult)
256
257 var zerotype tlsrpt.PolicyType
258 if result.hostResult.Policy.Type != zerotype {
259 hostResults = append(hostResults, result.hostResult)
260 }
261
262 // If we had a TLS-related failure when doing TLS, and we don't have a requirement
263 // for MTA-STS/DANE, we try again without TLS. This could be an old server that
264 // only does ancient TLS versions, or has a misconfiguration. Note that
265 // opportunistic TLS does not do regular certificate verification, so that can't be
266 // the problem.
267 // ../rfc/7435:459
268 // We don't fall back to plain text for DMARC reports. ../rfc/7489:1768 ../rfc/7489:2683
269 // We queue outgoing TLS reports with tlsRequiredNo, so reports can be delivered in
270 // case of broken TLS.
271 if result.err != nil && errors.Is(result.err, smtpclient.ErrTLS) && (!enforceMTASTS && tlsMode == smtpclient.TLSOpportunistic && !result.tlsDANE && !m0.IsDMARCReport || tlsRequiredNo) {
272 metricPlaintextFallback.Inc()
273 if tlsRequiredNo {
274 metricTLSRequiredNoIgnored.WithLabelValues("badtls").Inc()
275 }
276
277 // todo future: add a configuration option to not fall back?
278 nqlog.Info("connecting again for delivery attempt without tls",
279 slog.Bool("enforcemtasts", enforceMTASTS),
280 slog.Bool("tlsdane", result.tlsDANE),
281 slog.Any("requiretls", m0.RequireTLS))
282 result = deliverHost(nqlog, resolver, dialer, ourHostname, transportName, h, enforceMTASTS, haveMX, origNextHopAuthentic, origNextHop, expandedNextHopAuthentic, expandedNextHop, msgResps, smtpclient.TLSSkip, false, &tlsrpt.Result{})
283 }
284
285 remoteMTA = dsn.NameIP{Name: h.XString(false), IP: remoteIP}
286 if result.err != nil {
287 lastErr = result.err
288 var cerr smtpclient.Error
289 if errors.As(result.err, &cerr) {
290 if cerr.Secode == smtp.SePol7MissingReqTLS30 {
291 nmissingRequireTLS++
292 }
293 if cerr.Permanent {
294 break
295 }
296 }
297 continue
298 }
299
300 delIDs := make([]int64, len(result.delivered))
301 for i, mr := range result.delivered {
302 mqlog := nqlog.With(slog.Int64("msgid", mr.msg.ID), slog.Any("recipient", mr.msg.Recipient()))
303 mqlog.Info("delivered from queue")
304 delIDs[i] = mr.msg.ID
305 }
306 if len(delIDs) > 0 {
307 if err := queueDelete(context.Background(), delIDs...); err != nil {
308 nqlog.Errorx("deleting messages from queue after delivery", err)
309 }
310 }
311 for _, mr := range result.failed {
312 fail(ctx, nqlog, []*Msg{mr.msg}, m0.DialedIPs, backoff, remoteMTA, smtpclient.Error(mr.resp))
313 }
314 return
315 }
316
317 // In theory, we could make a failure permanent if we didn't find any mx host
318 // matching the mta-sts policy AND the policy is fresh AND all DNS records leading
319 // to the MX targets (including CNAME) have a TTL that is beyond the latest
320 // possible delivery attempt. Until that time, configuration problems can be
321 // corrected through DNS or policy update. Not sure if worth it in practice, there
322 // is a good chance the MX records can still change, at least on initial delivery
323 // failures.
324 // todo: possibly detect that future deliveries will fail due to long ttl's of cached records that are preventing delivery.
325
326 // If we failed due to requiretls not being satisfied, make the delivery permanent.
327 // It is unlikely the recipient domain will implement requiretls during our retry
328 // period. Best to let the sender know immediately.
329 if len(hosts) > 0 && nmissingRequireTLS == len(hosts) {
330 qlog.Info("marking delivery as permanently failed because recipient domain does not implement requiretls")
331 err := smtpclient.Error{
332 Permanent: true,
333 Code: smtp.C554TransactionFailed,
334 Secode: smtp.SePol7MissingReqTLS30,
335 Err: fmt.Errorf("destination servers do not support requiretls"),
336 }
337 fail(ctx, qlog, msgs, m0.DialedIPs, backoff, remoteMTA, err)
338 return
339 }
340
341 fail(ctx, qlog, msgs, m0.DialedIPs, backoff, remoteMTA, lastErr)
342 return
343}
344
345type deliverResult struct {
346 tlsDANE bool
347 remoteIP net.IP
348 hostResult tlsrpt.Result
349
350 // If err is set, no messages were delivered but delivered and failed are still
351 // nil. If err is not set, delivered and always add up to all msgs requested to be
352 // sent. All messages can be in failed.
353 delivered []*msgResp
354 failed []*msgResp
355 err error
356}
357
358// deliverHost attempts to deliver msgs to host. All msgs must have the same
359// delivery requirements (e.g. requiretls). Depending on tlsMode we'll do
360// opportunistic or required STARTTLS or skip TLS entirely. Based on tlsPKIX we do
361// PKIX/WebPKI verification (for MTA-STS). If we encounter DANE records, we verify
362// those. If the message has a message header "TLS-Required: No", we ignore TLS
363// verification errors.
364//
365// deliverHost updates DialedIPs of msgs, which must be saved in case of failure to
366// deliver.
367//
368// The haveMX and next-hop-authentic fields are used to determine if DANE is
369// applicable. The next-hop fields themselves are used to determine valid names
370// during DANE TLS certificate verification.
371//
372// The returned hostResult holds TLSRPT reporting results for the connection
373// attempt. Its policy type can be the zero value, indicating there was no finding
374// (e.g. internal error).
375//
376// deliverHost may send a message multiple times: if the server doesn't accept
377// multiple recipients for a message.
378func deliverHost(log mlog.Log, resolver dns.Resolver, dialer smtpclient.Dialer, ourHostname dns.Domain, transportName string, host dns.IPDomain, enforceMTASTS, haveMX, origNextHopAuthentic bool, origNextHop dns.Domain, expandedNextHopAuthentic bool, expandedNextHop dns.Domain, msgResps []*msgResp, tlsMode smtpclient.TLSMode, tlsPKIX bool, recipientDomainResult *tlsrpt.Result) (result deliverResult) {
379 // About attempting delivery to multiple addresses of a host: ../rfc/5321:3898
380
381 m0 := msgResps[0].msg
382 tlsRequiredNo := m0.RequireTLS != nil && !*m0.RequireTLS
383
384 var tlsDANE bool
385 var remoteIP net.IP
386 var hostResult tlsrpt.Result
387 start := time.Now()
388 defer func() {
389 result.tlsDANE = tlsDANE
390 result.remoteIP = remoteIP
391 result.hostResult = hostResult
392
393 mode := string(tlsMode)
394 if tlsPKIX {
395 mode += "+mtasts"
396 }
397 if tlsDANE {
398 mode += "+dane"
399 }
400
401 r := deliveryResult(result.err, len(result.delivered), len(result.failed))
402 d := float64(time.Since(start)) / float64(time.Second)
403 metricDelivery.WithLabelValues(fmt.Sprintf("%d", m0.Attempts), transportName, mode, r).Observe(d)
404
405 log.Debugx("queue deliverhost result", result.err,
406 slog.Any("host", host),
407 slog.Int("attempt", m0.Attempts),
408 slog.String("result", r),
409 slog.Int("delivered", len(result.delivered)),
410 slog.Int("failed", len(result.failed)),
411 slog.Any("tlsmode", tlsMode),
412 slog.Bool("tlspkix", tlsPKIX),
413 slog.Bool("tlsdane", tlsDANE),
414 slog.Bool("tlsrequiredno", tlsRequiredNo),
415 slog.Bool("badtls", result.err != nil && errors.Is(result.err, smtpclient.ErrTLS)),
416 slog.Duration("duration", time.Since(start)))
417 }()
418
419 // Open message to deliver.
420 f, err := os.Open(m0.MessagePath())
421 if err != nil {
422 return deliverResult{err: fmt.Errorf("open message file: %v", err)}
423 }
424 msgr := store.FileMsgReader(m0.MsgPrefix, f)
425 defer func() {
426 err := msgr.Close()
427 log.Check(err, "closing message after delivery attempt")
428 }()
429
430 ctx, cancel := context.WithTimeout(mox.Shutdown, 30*time.Second)
431 defer cancel()
432
433 // We must lookup the IPs for the host name before checking DANE TLSA records. And
434 // only check TLSA records for secure responses. This prevents problems with old
435 // name servers returning an error for TLSA requests or letting it timeout (not
436 // sending a response). ../rfc/7672:879
437 var daneRecords []adns.TLSA
438 var tlsHostnames []dns.Domain
439 if host.IsDomain() {
440 tlsHostnames = []dns.Domain{host.Domain}
441 }
442 for _, mr := range msgResps {
443 if mr.msg.DialedIPs == nil {
444 mr.msg.DialedIPs = map[string][]net.IP{}
445 }
446 }
447
448 countResultFailure := func() {
449 recipientDomainResult.Summary.TotalFailureSessionCount++
450 hostResult.Summary.TotalFailureSessionCount++
451 }
452
453 metricDestinations.Inc()
454 authentic, expandedAuthentic, expandedHost, ips, dualstack, err := smtpclient.GatherIPs(ctx, log.Logger, resolver, host, m0.DialedIPs)
455 destAuthentic := err == nil && authentic && origNextHopAuthentic && (!haveMX || expandedNextHopAuthentic) && host.IsDomain()
456 if !destAuthentic {
457 log.Debugx("not attempting verification with dane", err, slog.Bool("authentic", authentic), slog.Bool("expandedauthentic", expandedAuthentic))
458
459 // Track a DNSSEC error if found.
460 var errCode adns.ErrorCode
461 if err != nil {
462 if errors.As(err, &errCode) && errCode.IsAuthentication() {
463 // Result: ../rfc/8460:567
464 reasonCode := fmt.Sprintf("dns-extended-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
465 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, reasonCode)
466 hostResult = tlsrpt.MakeResult(tlsrpt.TLSA, host.Domain, fd)
467 countResultFailure()
468 }
469 } else {
470 // todo: we could lookup tlsa records, and log an error when they are not dnssec-signed. this should be interpreted simply as "not doing dane", but it could be useful to warn domain owners about, they may be under the impression they are dane-protected.
471 hostResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, host.Domain)
472 }
473 } else if tlsMode == smtpclient.TLSSkip {
474 metricDestinationsAuthentic.Inc()
475
476 // TLSSkip is used to fallback to plaintext, which is used with a TLS-Required: No
477 // header to ignore the recipient domain's DANE policy.
478
479 // possible err is propagated to below.
480 } else {
481 metricDestinationsAuthentic.Inc()
482
483 // Look for TLSA records in either the expandedHost, or otherwise the original
484 // host. ../rfc/7672:912
485 var tlsaBaseDomain dns.Domain
486 tlsDANE, daneRecords, tlsaBaseDomain, err = smtpclient.GatherTLSA(ctx, log.Logger, resolver, host.Domain, expandedNextHopAuthentic && expandedAuthentic, expandedHost)
487 if tlsDANE {
488 metricDestinationDANERequired.Inc()
489 }
490 if err != nil {
491 metricDestinationDANEGatherTLSAErrors.Inc()
492 }
493 if err == nil && tlsDANE {
494 tlsMode = smtpclient.TLSRequiredStartTLS
495 hostResult = tlsrpt.Result{Policy: tlsrpt.TLSAPolicy(daneRecords, tlsaBaseDomain)}
496 if len(daneRecords) == 0 {
497 // If there are no usable DANE records, we still have to use TLS, but without
498 // verifying its certificate. At least when there is no MTA-STS. Why? Perhaps to
499 // prevent ossification? The SMTP TLSA specification has different behaviour than
500 // the generic TLSA. "Usable" means different things in different places.
501 // ../rfc/7672:718 ../rfc/6698:1845 ../rfc/6698:660
502 log.Debug("no usable dane records, requiring starttls but not verifying with dane")
503 metricDestinationDANESTARTTLSUnverified.Inc()
504 daneRecords = nil
505 // Result: ../rfc/8460:576 (this isn't technicall invalid, only all-unusable...)
506 hostResult.FailureDetails = []tlsrpt.FailureDetails{
507 {
508 ResultType: tlsrpt.ResultTLSAInvalid,
509 ReceivingMXHostname: host.XString(false),
510 FailureReasonCode: "all-unusable-records+ignored",
511 },
512 }
513 } else {
514 log.Debug("delivery with required starttls with dane verification", slog.Any("allowedtlshostnames", tlsHostnames))
515 }
516 // Based on CNAMEs followed and DNSSEC-secure status, we must allow up to 4 host
517 // names.
518 tlsHostnames = smtpclient.GatherTLSANames(haveMX, expandedNextHopAuthentic, expandedAuthentic, origNextHop, expandedNextHop, host.Domain, tlsaBaseDomain)
519 } else if !tlsDANE {
520 log.Debugx("not doing opportunistic dane after gathering tlsa records", err)
521 err = nil
522 hostResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, tlsaBaseDomain)
523 } else if err != nil {
524 fd := tlsrpt.Details(tlsrpt.ResultTLSAInvalid, "")
525 var errCode adns.ErrorCode
526 if errors.As(err, &errCode) {
527 fd.FailureReasonCode = fmt.Sprintf("extended-dns-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
528 if errCode.IsAuthentication() {
529 // Result: ../rfc/8460:580
530 fd.ResultType = tlsrpt.ResultDNSSECInvalid
531 countResultFailure()
532 }
533 }
534 hostResult = tlsrpt.Result{
535 Policy: tlsrpt.TLSAPolicy(daneRecords, tlsaBaseDomain),
536 FailureDetails: []tlsrpt.FailureDetails{fd},
537 }
538
539 if tlsRequiredNo {
540 log.Debugx("error gathering dane tlsa records with dane required, but continuing without validation due to tls-required-no message header", err)
541 err = nil
542 metricTLSRequiredNoIgnored.WithLabelValues("badtlsa").Inc()
543 }
544 }
545 // else, err is propagated below.
546 }
547
548 // todo: for requiretls, should an MTA-STS policy in mode testing be treated as good enough for requiretls? let's be strict and assume not.
549 // todo: ../rfc/8689:276 seems to specify stricter requirements on name in certificate than DANE (which allows original recipient domain name and cname-expanded name, and hints at following CNAME for MX targets as well, allowing both their original and expanded names too). perhaps the intent was just to say the name must be validated according to the relevant specifications?
550 // todo: for requiretls, should we allow no usable dane records with requiretls? dane allows it, but doesn't seem in spirit of requiretls, so not allowing it.
551 if err == nil && m0.RequireTLS != nil && *m0.RequireTLS && !(tlsDANE && len(daneRecords) > 0) && !enforceMTASTS {
552 log.Info("verified tls is required, but destination has no usable dane records and no mta-sts policy, canceling delivery attempt to host")
553 metricRequireTLSUnsupported.WithLabelValues("nopolicy").Inc()
554 // Resond with proper enhanced status code. ../rfc/8689:301
555 smtpErr := smtpclient.Error{
556 Code: smtp.C554TransactionFailed,
557 Secode: smtp.SePol7MissingReqTLS30,
558 Err: fmt.Errorf("missing required tls verification mechanism"),
559 }
560 return deliverResult{err: smtpErr}
561 }
562
563 // Dial the remote host given the IPs if no error yet.
564 var conn net.Conn
565 if err == nil {
566 connectionCounter.Add(1)
567 conn, remoteIP, err = smtpclient.Dial(ctx, log.Logger, dialer, host, ips, 25, m0.DialedIPs, mox.Conf.Static.SpecifiedSMTPListenIPs)
568 }
569 cancel()
570
571 // Set error for metrics.
572 var dialResult string
573 switch {
574 case err == nil:
575 dialResult = "ok"
576 case errors.Is(err, os.ErrDeadlineExceeded), errors.Is(err, context.DeadlineExceeded):
577 dialResult = "timeout"
578 case errors.Is(err, context.Canceled):
579 dialResult = "canceled"
580 default:
581 dialResult = "error"
582 }
583 metricConnection.WithLabelValues(dialResult).Inc()
584 if err != nil {
585 log.Debugx("connecting to remote smtp", err, slog.Any("host", host))
586 return deliverResult{err: fmt.Errorf("dialing smtp server: %v", err)}
587 }
588
589 var mailFrom string
590 if m0.SenderLocalpart != "" || !m0.SenderDomain.IsZero() {
591 mailFrom = m0.Sender().XString(m0.SMTPUTF8)
592 }
593
594 // todo future: get closer to timeouts specified in rfc? ../rfc/5321:3610
595 log = log.With(slog.Any("remoteip", remoteIP))
596 ctx, cancel = context.WithTimeout(mox.Shutdown, 30*time.Minute)
597 defer cancel()
598 mox.Connections.Register(conn, "smtpclient", "queue")
599
600 // Initialize SMTP session, sending EHLO/HELO and STARTTLS with specified tls mode.
601 var firstHost dns.Domain
602 var moreHosts []dns.Domain
603 if len(tlsHostnames) > 0 {
604 // For use with DANE-TA.
605 firstHost = tlsHostnames[0]
606 moreHosts = tlsHostnames[1:]
607 }
608 var verifiedRecord adns.TLSA
609 opts := smtpclient.Opts{
610 IgnoreTLSVerifyErrors: tlsRequiredNo,
611 RootCAs: mox.Conf.Static.TLS.CertPool,
612 DANERecords: daneRecords,
613 DANEMoreHostnames: moreHosts,
614 DANEVerifiedRecord: &verifiedRecord,
615 RecipientDomainResult: recipientDomainResult,
616 HostResult: &hostResult,
617 }
618 sc, err := smtpclient.New(ctx, log.Logger, conn, tlsMode, tlsPKIX, ourHostname, firstHost, opts)
619 defer func() {
620 if sc == nil {
621 conn.Close()
622 } else {
623 sc.Close()
624 }
625 mox.Connections.Unregister(conn)
626 }()
627 if err == nil && m0.SenderAccount != "" {
628 // Remember the STARTTLS and REQUIRETLS support for this recipient domain.
629 // It is used in the webmail client, to show the recipient domain security mechanisms.
630 // We always save only the last connection we actually encountered. There may be
631 // multiple MX hosts, perhaps only some support STARTTLS and REQUIRETLS. We may not
632 // be accurate for the whole domain, but we're only storing a hint.
633 rdt := store.RecipientDomainTLS{
634 Domain: m0.RecipientDomain.Domain.Name(),
635 STARTTLS: sc.TLSConnectionState() != nil,
636 RequireTLS: sc.SupportsRequireTLS(),
637 }
638 if err = updateRecipientDomainTLS(ctx, log, m0.SenderAccount, rdt); err != nil {
639 err = fmt.Errorf("storing recipient domain tls status: %w", err)
640 }
641 }
642 if err != nil {
643 if cerr, ok := err.(smtpclient.Error); ok {
644 // If we are being rejected due to policy reasons on the first
645 // attempt and remote has both IPv4 and IPv6, we'll give it
646 // another try. Our first IP may be in a block list, the address for
647 // the other family perhaps is not.
648 if cerr.Permanent && m0.Attempts == 1 && dualstack && strings.HasPrefix(cerr.Secode, "7.") {
649 cerr.Permanent = false
650 }
651 // If server does not implement requiretls, respond with that code. ../rfc/8689:301
652 if errors.Is(cerr.Err, smtpclient.ErrRequireTLSUnsupported) {
653 cerr.Secode = smtp.SePol7MissingReqTLS30
654 metricRequireTLSUnsupported.WithLabelValues("norequiretls").Inc()
655 }
656 err = cerr
657 }
658 return deliverResult{err: err}
659 }
660
661 // SMTP session is ready. Finally try to actually deliver.
662 has8bit := m0.Has8bit
663 smtputf8 := m0.SMTPUTF8
664 var msg io.Reader = msgr
665 resetReader := msgr.Reset
666 size := m0.Size
667 if m0.DSNUTF8 != nil && sc.Supports8BITMIME() && sc.SupportsSMTPUTF8() {
668 has8bit = true
669 smtputf8 = true
670 size = int64(len(m0.DSNUTF8))
671 msg = bytes.NewReader(m0.DSNUTF8)
672 resetReader = func() {
673 msg = bytes.NewReader(m0.DSNUTF8)
674 }
675 }
676
677 // Try to deliver messages. We'll do multiple transactions if the smtp server responds
678 // with "too many recipients".
679 todo := msgResps
680 var delivered, failed []*msgResp
681 for len(todo) > 0 {
682 resetReader()
683
684 // SMTP server may limit number of recipients in single transaction.
685 n := len(todo)
686 if sc.ExtLimitRcptMax > 0 && sc.ExtLimitRcptMax < len(todo) {
687 n = sc.ExtLimitRcptMax
688 }
689
690 rcpts := make([]string, n)
691 for i, mr := range todo[:n] {
692 rcpts[i] = mr.msg.Recipient().XString(m0.SMTPUTF8)
693 }
694
695 resps, err := sc.DeliverMultiple(ctx, mailFrom, rcpts, size, msg, has8bit, smtputf8, m0.RequireTLS != nil && *m0.RequireTLS)
696 if err != nil && len(resps) == len(msgResps) {
697 // If error and it applies to all recipients, return a single error.
698 return deliverResult{err: err}
699 }
700 var ntodo []*msgResp
701 for i, mr := range todo[:n] {
702 if err != nil {
703 mr.resp = smtpclient.Response{Err: err}
704 failed = append(failed, mr)
705 } else if i > 0 && (resps[i].Code == smtp.C452StorageFull || resps[i].Code == smtp.C552MailboxFull) {
706 ntodo = append(ntodo, mr)
707 } else if resps[i].Code == smtp.C250Completed {
708 delivered = append(delivered, mr)
709 } else {
710 failed = append(failed, mr)
711 }
712 }
713 todo = append(ntodo, todo[n:]...)
714
715 // We don't take LIMITS MAILMAX into account. Multiple MAIL commands are normal in
716 // SMTP. If the server doesn't support that, it will likely return a temporary
717 // error. So at least we'll try again. This would be quite unusual. And wasteful,
718 // because we would immediately dial again, do the TLS handshake, EHLO, etc. Let's
719 // implement such a limit when we see it in practice.
720 }
721
722 return deliverResult{delivered: delivered, failed: failed}
723}
724
725// Update (overwite) last known starttls/requiretls support for recipient domain.
726func updateRecipientDomainTLS(ctx context.Context, log mlog.Log, senderAccount string, rdt store.RecipientDomainTLS) error {
727 acc, err := store.OpenAccount(log, senderAccount)
728 if err != nil {
729 return fmt.Errorf("open account: %w", err)
730 }
731 err = acc.DB.Write(ctx, func(tx *bstore.Tx) error {
732 // First delete any existing record.
733 if err := tx.Delete(&store.RecipientDomainTLS{Domain: rdt.Domain}); err != nil && err != bstore.ErrAbsent {
734 return fmt.Errorf("removing previous recipient domain tls status: %w", err)
735 }
736 // Insert new record.
737 return tx.Insert(&rdt)
738 })
739 if err != nil {
740 return fmt.Errorf("adding recipient domain tls status to account database: %w", err)
741 }
742 return nil
743}
744