1package queue
2
3import (
4 "bytes"
5 "context"
6 "errors"
7 "fmt"
8 "io"
9 "net"
10 "os"
11 "strings"
12 "time"
13
14 "golang.org/x/exp/slog"
15
16 "github.com/prometheus/client_golang/prometheus"
17 "github.com/prometheus/client_golang/prometheus/promauto"
18
19 "github.com/mjl-/adns"
20 "github.com/mjl-/bstore"
21
22 "github.com/mjl-/mox/dns"
23 "github.com/mjl-/mox/dsn"
24 "github.com/mjl-/mox/mlog"
25 "github.com/mjl-/mox/mox-"
26 "github.com/mjl-/mox/mtasts"
27 "github.com/mjl-/mox/mtastsdb"
28 "github.com/mjl-/mox/smtp"
29 "github.com/mjl-/mox/smtpclient"
30 "github.com/mjl-/mox/store"
31 "github.com/mjl-/mox/tlsrpt"
32)
33
34var (
35 metricDestinations = promauto.NewCounter(
36 prometheus.CounterOpts{
37 Name: "mox_queue_destinations_total",
38 Help: "Total destination (e.g. MX) lookups for delivery attempts, including those in mox_smtpclient_destinations_authentic_total.",
39 },
40 )
41 metricDestinationsAuthentic = promauto.NewCounter(
42 prometheus.CounterOpts{
43 Name: "mox_queue_destinations_authentic_total",
44 Help: "Destination (e.g. MX) lookups for delivery attempts authenticated with DNSSEC so they are candidates for DANE verification.",
45 },
46 )
47 metricDestinationDANERequired = promauto.NewCounter(
48 prometheus.CounterOpts{
49 Name: "mox_queue_destination_dane_required_total",
50 Help: "Total number of connections to hosts with valid TLSA records making DANE required.",
51 },
52 )
53 metricDestinationDANESTARTTLSUnverified = promauto.NewCounter(
54 prometheus.CounterOpts{
55 Name: "mox_queue_destination_dane_starttlsunverified_total",
56 Help: "Total number of connections with required DANE where all TLSA records were unusable.",
57 },
58 )
59 metricDestinationDANEGatherTLSAErrors = promauto.NewCounter(
60 prometheus.CounterOpts{
61 Name: "mox_queue_destination_dane_gathertlsa_errors_total",
62 Help: "Total number of connections where looking up TLSA records resulted in an error.",
63 },
64 )
65 // todo: recognize when "tls-required-no" message header caused a non-verifying certificate to be overridden. requires doing our own certificate validation after having set tls.Config.InsecureSkipVerify due to tls-required-no.
66 metricTLSRequiredNoIgnored = promauto.NewCounterVec(
67 prometheus.CounterOpts{
68 Name: "mox_queue_tlsrequiredno_ignored_total",
69 Help: "Delivery attempts with TLS policy findings ignored due to message with TLS-Required: No header. Does not cover case where TLS certificate cannot be PKIX-verified.",
70 },
71 []string{
72 "ignored", // mtastspolicy (error getting policy), mtastsmx (mx host not allowed in policy), badtls (error negotiating tls), badtlsa (error fetching dane tlsa records)
73 },
74 )
75 metricRequireTLSUnsupported = promauto.NewCounterVec(
76 prometheus.CounterOpts{
77 Name: "mox_queue_requiretls_unsupported_total",
78 Help: "Delivery attempts that failed due to message with REQUIRETLS.",
79 },
80 []string{
81 "reason", // nopolicy (no mta-sts and no dane), norequiretls (smtp server does not support requiretls)
82 },
83 )
84 metricPlaintextFallback = promauto.NewCounter(
85 prometheus.CounterOpts{
86 Name: "mox_queue_plaintext_fallback_total",
87 Help: "Delivery attempts with fallback to plain text delivery.",
88 },
89 )
90)
91
92// todo: rename function, perhaps put some of the params in a delivery struct so we don't pass all the params all the time?
93func fail(ctx context.Context, qlog mlog.Log, m Msg, backoff time.Duration, permanent bool, remoteMTA dsn.NameIP, secodeOpt, errmsg string) {
94 // todo future: when we implement relaying, we should be able to send DSNs to non-local users. and possibly specify a null mailfrom. ../rfc/5321:1503
95 // todo future: when we implement relaying, and a dsn cannot be delivered, and requiretls was active, we cannot drop the message. instead deliver to local postmaster? though ../rfc/8689:383 may intend to say the dsn should be delivered without requiretls?
96 // todo future: when we implement smtp dsn extension, parameter RET=FULL must be disregarded for messages with REQUIRETLS. ../rfc/8689:379
97
98 if permanent || m.MaxAttempts == 0 && m.Attempts >= 8 || m.MaxAttempts > 0 && m.Attempts >= m.MaxAttempts {
99 qlog.Errorx("permanent failure delivering from queue", errors.New(errmsg))
100 deliverDSNFailure(ctx, qlog, m, remoteMTA, secodeOpt, errmsg)
101
102 if err := queueDelete(context.Background(), m.ID); err != nil {
103 qlog.Errorx("deleting message from queue after permanent failure", err)
104 }
105 return
106 }
107
108 qup := bstore.QueryDB[Msg](context.Background(), DB)
109 qup.FilterID(m.ID)
110 if _, err := qup.UpdateNonzero(Msg{LastError: errmsg, DialedIPs: m.DialedIPs}); err != nil {
111 qlog.Errorx("storing delivery error", err, slog.String("deliveryerror", errmsg))
112 }
113
114 if m.Attempts == 5 {
115 // We've attempted deliveries at these intervals: 0, 7.5m, 15m, 30m, 1h, 2u.
116 // Let sender know delivery is delayed.
117 qlog.Errorx("temporary failure delivering from queue, sending delayed dsn", errors.New(errmsg), slog.Duration("backoff", backoff))
118
119 retryUntil := m.LastAttempt.Add((4 + 8 + 16) * time.Hour)
120 deliverDSNDelay(ctx, qlog, m, remoteMTA, secodeOpt, errmsg, retryUntil)
121 } else {
122 qlog.Errorx("temporary failure delivering from queue", errors.New(errmsg), slog.Duration("backoff", backoff), slog.Time("nextattempt", m.NextAttempt))
123 }
124}
125
126// Delivery by directly dialing (MX) hosts for destination domain of message.
127//
128// The returned results are for use in a TLSRPT report, it holds success/failure
129// counts and failure details for delivery/connection attempts. The
130// recipientDomainResult is for policies/counts/failures about the whole recipient
131// domain (MTA-STS), its policy type can be empty, in which case there is no
132// information (e.g. internal failure). hostResults are per-host details (DANE, one
133// per MX target).
134func deliverDirect(qlog mlog.Log, resolver dns.Resolver, dialer smtpclient.Dialer, ourHostname dns.Domain, transportName string, m Msg, backoff time.Duration) (recipientDomainResult tlsrpt.Result, hostResults []tlsrpt.Result) {
135 // High-level approach:
136 // - Resolve domain to deliver to (CNAME), and determine hosts to try to deliver to (MX)
137 // - Get MTA-STS policy for domain (optional). If present, only deliver to its
138 // allowlisted hosts and verify TLS against CA pool.
139 // - For each host, attempt delivery. If the attempt results in a permanent failure
140 // (as claimed by remote with a 5xx SMTP response, or perhaps decided by us), the
141 // attempt can be aborted. Other errors are often temporary and may result in later
142 // successful delivery. But hopefully the delivery just succeeds. For each host:
143 // - If there is an MTA-STS policy, we only connect to allow-listed hosts.
144 // - We try to lookup DANE records (optional) and verify them if present.
145 // - If RequireTLS is true, we only deliver if the remote SMTP server implements it.
146 // - If RequireTLS is false, we'll fall back to regular delivery attempts without
147 // TLS verification and possibly without TLS at all, ignoring recipient domain/host
148 // MTA-STS and DANE policies.
149
150 // Resolve domain and hosts to attempt delivery to.
151 // These next-hop names are often the name under which we find MX records. The
152 // expanded name is different from the original if the original was a CNAME,
153 // possibly a chain. If there are no MX records, it can be an IP or the host
154 // directly.
155 origNextHop := m.RecipientDomain.Domain
156 ctx := mox.Shutdown
157 haveMX, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hosts, permanent, err := smtpclient.GatherDestinations(ctx, qlog.Logger, resolver, m.RecipientDomain)
158 if err != nil {
159 // If this is a DNSSEC authentication error, we'll collect it for TLS reporting.
160 // Hopefully it's a temporary misconfiguration that is solve before we try to send
161 // our report. We don't report as "dnssec-invalid", because that is defined as
162 // being for DANE. ../rfc/8460:580
163 var errCode adns.ErrorCode
164 if errors.As(err, &errCode) && errCode.IsAuthentication() {
165 // Result: ../rfc/8460:567
166 reasonCode := fmt.Sprintf("dns-extended-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
167 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, reasonCode)
168 recipientDomainResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, origNextHop, fd)
169 recipientDomainResult.Summary.TotalFailureSessionCount++
170 }
171
172 fail(ctx, qlog, m, backoff, permanent, dsn.NameIP{}, "", err.Error())
173 return
174 }
175
176 tlsRequiredNo := m.RequireTLS != nil && !*m.RequireTLS
177
178 // Check for MTA-STS policy and enforce it if needed.
179 // We must check at the original next-hop, i.e. recipient domain, not following any
180 // CNAMEs. If we were to follow CNAMEs and ask for MTA-STS at that domain, it
181 // would only take a single CNAME DNS response to direct us to an unrelated domain.
182 var policy *mtasts.Policy // Policy can have mode enforce, testing and none.
183 if !origNextHop.IsZero() {
184 policy, recipientDomainResult, _, err = mtastsdb.Get(ctx, qlog.Logger, resolver, origNextHop)
185 if err != nil {
186 if tlsRequiredNo {
187 qlog.Infox("mtasts lookup temporary error, continuing due to tls-required-no message header", err, slog.Any("domain", origNextHop))
188 metricTLSRequiredNoIgnored.WithLabelValues("mtastspolicy").Inc()
189 } else {
190 qlog.Infox("mtasts lookup temporary error, aborting delivery attempt", err, slog.Any("domain", origNextHop))
191 recipientDomainResult.Summary.TotalFailureSessionCount++
192 fail(ctx, qlog, m, backoff, false, dsn.NameIP{}, "", err.Error())
193 return
194 }
195 }
196 // note: policy can be nil, if a domain does not implement MTA-STS or it's the
197 // first time we fetch the policy and if we encountered an error.
198 }
199
200 // We try delivery to each host until we have success or a permanent failure. So
201 // for transient errors, we'll try the next host. For MX records pointing to a
202 // dual stack host, we turn a permanent failure due to policy on the first delivery
203 // attempt into a temporary failure and make sure to try the other address family
204 // the next attempt. This should reduce issues due to one of our IPs being on a
205 // block list. We won't try multiple IPs of the same address family. Surprisingly,
206 // RFC 5321 does not specify a clear algorithm, but common practice is probably
207 // ../rfc/3974:268.
208 var remoteMTA dsn.NameIP
209 var secodeOpt, errmsg string
210 permanent = false
211 nmissingRequireTLS := 0
212 // todo: should make distinction between host permanently not accepting the message, and the message not being deliverable permanently. e.g. a mx host may have a size limit, or not accept 8bitmime, while another host in the list does accept the message. same for smtputf8, ../rfc/6531:555
213 for _, h := range hosts {
214 // ../rfc/8461:913
215 if policy != nil && policy.Mode != mtasts.ModeNone && !policy.Matches(h.Domain) {
216 // todo: perhaps only send tlsrpt failure if none of the mx hosts matched? reporting about each mismatch seems useful for domain owners, to discover mtasts policies they didn't update after changing mx. there is a risk a domain owner intentionally didn't put all mx'es in the mtasts policy, but they probably won't mind being reported about that.
217 // Other error: Surprising that TLSRPT doesn't have an MTA-STS specific error code
218 // for this case, it's a big part of the reason to have MTA-STS. ../rfc/8460:610
219 // Result: ../rfc/8460:567 todo spec: propose adding a result for this case?
220 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, "mtasts-policy-mx-mismatch")
221 fd.ReceivingMXHostname = h.Domain.ASCII
222 recipientDomainResult.Add(0, 0, fd)
223
224 var policyHosts []string
225 for _, mx := range policy.MX {
226 policyHosts = append(policyHosts, mx.LogString())
227 }
228 if policy.Mode == mtasts.ModeEnforce {
229 if tlsRequiredNo {
230 qlog.Info("mx host does not match mta-sts policy in mode enforce, ignoring due to tls-required-no message header", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
231 metricTLSRequiredNoIgnored.WithLabelValues("mtastsmx").Inc()
232 } else {
233 errmsg = fmt.Sprintf("mx host %s does not match enforced mta-sts policy with hosts %s", h.Domain, strings.Join(policyHosts, ","))
234 qlog.Error("mx host does not match mta-sts policy in mode enforce, skipping", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
235 recipientDomainResult.Summary.TotalFailureSessionCount++
236 continue
237 }
238 } else {
239 qlog.Error("mx host does not match mta-sts policy, but it is not enforced, continuing", slog.Any("host", h.Domain), slog.Any("policyhosts", policyHosts))
240 }
241 }
242
243 qlog.Info("delivering to remote", slog.Any("remote", h))
244 nqlog := qlog.WithCid(mox.Cid())
245 var remoteIP net.IP
246
247 enforceMTASTS := policy != nil && policy.Mode == mtasts.ModeEnforce
248 tlsMode := smtpclient.TLSOpportunistic
249 tlsPKIX := false
250 if enforceMTASTS {
251 tlsMode = smtpclient.TLSRequiredStartTLS
252 tlsPKIX = true
253 // note: smtpclient will still go through PKIX verification, and report about it, but not fail the connection if not passing.
254 }
255
256 // Try to deliver to host. We can get various errors back. Like permanent failure
257 // response codes, TCP, DNSSEC, TLS (opportunistic, i.e. optional with fallback to
258 // without), etc. It's a balancing act to handle these situations correctly. We
259 // don't want to bounce unnecessarily. But also not keep trying if there is no
260 // chance of success.
261 //
262 // deliverHost will report generic TLS and MTA-STS-specific failures in
263 // recipientDomainResult. If DANE is encountered, it will add a DANE reporting
264 // result for generic TLS and DANE-specific errors.
265
266 // Set if TLSA records were found. Means TLS is required for this host, usually
267 // with verification of the certificate, and that we cannot fall back to
268 // opportunistic TLS.
269 var tlsDANE bool
270
271 var badTLS, ok bool
272 var hostResult tlsrpt.Result
273 permanent, tlsDANE, badTLS, secodeOpt, remoteIP, errmsg, hostResult, ok = deliverHost(nqlog, resolver, dialer, ourHostname, transportName, h, enforceMTASTS, haveMX, origNextHopAuthentic, origNextHop, expandedNextHopAuthentic, expandedNextHop, &m, tlsMode, tlsPKIX, &recipientDomainResult)
274
275 var zerotype tlsrpt.PolicyType
276 if hostResult.Policy.Type != zerotype {
277 hostResults = append(hostResults, hostResult)
278 }
279
280 // If we had a TLS-related failure when doing TLS, and we don't have a requirement
281 // for MTA-STS/DANE, we try again without TLS. This could be an old server that
282 // only does ancient TLS versions, or has a misconfiguration. Note that
283 // opportunistic TLS does not do regular certificate verification, so that can't be
284 // the problem.
285 // ../rfc/7435:459
286 // We don't fall back to plain text for DMARC reports. ../rfc/7489:1768 ../rfc/7489:2683
287 // We queue outgoing TLS reports with tlsRequiredNo, so reports can be delivered in
288 // case of broken TLS.
289 if !ok && badTLS && (!enforceMTASTS && tlsMode == smtpclient.TLSOpportunistic && !tlsDANE && !m.IsDMARCReport || tlsRequiredNo) {
290 metricPlaintextFallback.Inc()
291 if tlsRequiredNo {
292 metricTLSRequiredNoIgnored.WithLabelValues("badtls").Inc()
293 }
294
295 // todo future: add a configuration option to not fall back?
296 nqlog.Info("connecting again for delivery attempt without tls",
297 slog.Bool("enforcemtasts", enforceMTASTS),
298 slog.Bool("tlsdane", tlsDANE),
299 slog.Any("requiretls", m.RequireTLS))
300 permanent, _, _, secodeOpt, remoteIP, errmsg, _, ok = deliverHost(nqlog, resolver, dialer, ourHostname, transportName, h, enforceMTASTS, haveMX, origNextHopAuthentic, origNextHop, expandedNextHopAuthentic, expandedNextHop, &m, smtpclient.TLSSkip, false, &tlsrpt.Result{})
301 }
302
303 if ok {
304 nqlog.Info("delivered from queue")
305 if err := queueDelete(context.Background(), m.ID); err != nil {
306 nqlog.Errorx("deleting message from queue after delivery", err)
307 }
308 return
309 }
310 remoteMTA = dsn.NameIP{Name: h.XString(false), IP: remoteIP}
311 if permanent {
312 break
313 }
314 if secodeOpt == smtp.SePol7MissingReqTLS {
315 nmissingRequireTLS++
316 }
317 }
318
319 // In theory, we could make a failure permanent if we didn't find any mx host
320 // matching the mta-sts policy AND the policy is fresh AND all DNS records leading
321 // to the MX targets (including CNAME) have a TTL that is beyond the latest
322 // possible delivery attempt. Until that time, configuration problems can be
323 // corrected through DNS or policy update. Not sure if worth it in practice, there
324 // is a good chance the MX records can still change, at least on initial delivery
325 // failures.
326 // todo: possibly detect that future deliveries will fail due to long ttl's of cached records that are preventing delivery.
327
328 // If we failed due to requiretls not being satisfied, make the delivery permanent.
329 // It is unlikely the recipient domain will implement requiretls during our retry
330 // period. Best to let the sender know immediately.
331 if !permanent && nmissingRequireTLS > 0 && nmissingRequireTLS == len(hosts) {
332 qlog.Info("marking delivery as permanently failed because recipient domain does not implement requiretls")
333 permanent = true
334 }
335
336 fail(ctx, qlog, m, backoff, permanent, remoteMTA, secodeOpt, errmsg)
337 return
338}
339
340// deliverHost attempts to deliver m to host. Depending on tlsMode we'll do
341// opportunistic or required STARTTLS or skip TLS entirely. Based on tlsPKIX we do
342// PKIX/WebPKI verification (for MTA-STS). If we encounter DANE records, we verify
343// those. If the message has a message header "TLS-Required: No", we ignore TLS
344// verification errors.
345//
346// deliverHost updates m.DialedIPs, which must be saved in case of failure to
347// deliver.
348//
349// The haveMX and next-hop-authentic fields are used to determine if DANE is
350// applicable. The next-hop fields themselves are used to determine valid names
351// during DANE TLS certificate verification.
352//
353// The returned hostResult holds TLSRPT reporting results for the connection
354// attempt. Its policy type can be the zero value, indicating there was no finding
355// (e.g. internal error).
356func deliverHost(log mlog.Log, resolver dns.Resolver, dialer smtpclient.Dialer, ourHostname dns.Domain, transportName string, host dns.IPDomain, enforceMTASTS, haveMX, origNextHopAuthentic bool, origNextHop dns.Domain, expandedNextHopAuthentic bool, expandedNextHop dns.Domain, m *Msg, tlsMode smtpclient.TLSMode, tlsPKIX bool, recipientDomainResult *tlsrpt.Result) (permanent, tlsDANE, badTLS bool, secodeOpt string, remoteIP net.IP, errmsg string, hostResult tlsrpt.Result, ok bool) {
357 // About attempting delivery to multiple addresses of a host: ../rfc/5321:3898
358
359 tlsRequiredNo := m.RequireTLS != nil && !*m.RequireTLS
360
361 start := time.Now()
362 var deliveryResult string
363 defer func() {
364 mode := string(tlsMode)
365 if tlsPKIX {
366 mode += "+mtasts"
367 }
368 if tlsDANE {
369 mode += "+dane"
370 }
371 metricDelivery.WithLabelValues(fmt.Sprintf("%d", m.Attempts), transportName, mode, deliveryResult).Observe(float64(time.Since(start)) / float64(time.Second))
372 log.Debug("queue deliverhost result",
373 slog.Any("host", host),
374 slog.Int("attempt", m.Attempts),
375 slog.Any("tlsmode", tlsMode),
376 slog.Bool("tlspkix", tlsPKIX),
377 slog.Bool("tlsdane", tlsDANE),
378 slog.Bool("tlsrequiredno", tlsRequiredNo),
379 slog.Bool("permanent", permanent),
380 slog.Bool("badtls", badTLS),
381 slog.String("secodeopt", secodeOpt),
382 slog.String("errmsg", errmsg),
383 slog.Bool("ok", ok),
384 slog.Duration("duration", time.Since(start)))
385 }()
386
387 // Open message to deliver.
388 f, err := os.Open(m.MessagePath())
389 if err != nil {
390 return false, false, false, "", nil, fmt.Sprintf("open message file: %s", err), hostResult, false
391 }
392 msgr := store.FileMsgReader(m.MsgPrefix, f)
393 defer func() {
394 err := msgr.Close()
395 log.Check(err, "closing message after delivery attempt")
396 }()
397
398 ctx, cancel := context.WithTimeout(mox.Shutdown, 30*time.Second)
399 defer cancel()
400
401 // We must lookup the IPs for the host name before checking DANE TLSA records. And
402 // only check TLSA records for secure responses. This prevents problems with old
403 // name servers returning an error for TLSA requests or letting it timeout (not
404 // sending a response). ../rfc/7672:879
405 var daneRecords []adns.TLSA
406 var tlsHostnames []dns.Domain
407 if host.IsDomain() {
408 tlsHostnames = []dns.Domain{host.Domain}
409 }
410 if m.DialedIPs == nil {
411 m.DialedIPs = map[string][]net.IP{}
412 }
413
414 countResultFailure := func() {
415 recipientDomainResult.Summary.TotalFailureSessionCount++
416 hostResult.Summary.TotalFailureSessionCount++
417 }
418
419 metricDestinations.Inc()
420 authentic, expandedAuthentic, expandedHost, ips, dualstack, err := smtpclient.GatherIPs(ctx, log.Logger, resolver, host, m.DialedIPs)
421 destAuthentic := err == nil && authentic && origNextHopAuthentic && (!haveMX || expandedNextHopAuthentic) && host.IsDomain()
422 if !destAuthentic {
423 log.Debugx("not attempting verification with dane", err, slog.Bool("authentic", authentic), slog.Bool("expandedauthentic", expandedAuthentic))
424
425 // Track a DNSSEC error if found.
426 var errCode adns.ErrorCode
427 if err != nil {
428 if errors.As(err, &errCode) && errCode.IsAuthentication() {
429 // Result: ../rfc/8460:567
430 reasonCode := fmt.Sprintf("dns-extended-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
431 fd := tlsrpt.Details(tlsrpt.ResultValidationFailure, reasonCode)
432 hostResult = tlsrpt.MakeResult(tlsrpt.TLSA, host.Domain, fd)
433 countResultFailure()
434 }
435 } else {
436 // todo: we could lookup tlsa records, and log an error when they are not dnssec-signed. this should be interpreted simply as "not doing dane", but it could be useful to warn domain owners about, they may be under the impression they are dane-protected.
437 hostResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, host.Domain)
438 }
439 } else if tlsMode == smtpclient.TLSSkip {
440 metricDestinationsAuthentic.Inc()
441
442 // TLSSkip is used to fallback to plaintext, which is used with a TLS-Required: No
443 // header to ignore the recipient domain's DANE policy.
444
445 // possible err is propagated to below.
446 } else {
447 metricDestinationsAuthentic.Inc()
448
449 // Look for TLSA records in either the expandedHost, or otherwise the original
450 // host. ../rfc/7672:912
451 var tlsaBaseDomain dns.Domain
452 tlsDANE, daneRecords, tlsaBaseDomain, err = smtpclient.GatherTLSA(ctx, log.Logger, resolver, host.Domain, expandedNextHopAuthentic && expandedAuthentic, expandedHost)
453 if tlsDANE {
454 metricDestinationDANERequired.Inc()
455 }
456 if err != nil {
457 metricDestinationDANEGatherTLSAErrors.Inc()
458 }
459 if err == nil && tlsDANE {
460 tlsMode = smtpclient.TLSRequiredStartTLS
461 hostResult = tlsrpt.Result{Policy: tlsrpt.TLSAPolicy(daneRecords, tlsaBaseDomain)}
462 if len(daneRecords) == 0 {
463 // If there are no usable DANE records, we still have to use TLS, but without
464 // verifying its certificate. At least when there is no MTA-STS. Why? Perhaps to
465 // prevent ossification? The SMTP TLSA specification has different behaviour than
466 // the generic TLSA. "Usable" means different things in different places.
467 // ../rfc/7672:718 ../rfc/6698:1845 ../rfc/6698:660
468 log.Debug("no usable dane records, requiring starttls but not verifying with dane")
469 metricDestinationDANESTARTTLSUnverified.Inc()
470 daneRecords = nil
471 // Result: ../rfc/8460:576 (this isn't technicall invalid, only all-unusable...)
472 hostResult.FailureDetails = []tlsrpt.FailureDetails{
473 {
474 ResultType: tlsrpt.ResultTLSAInvalid,
475 ReceivingMXHostname: host.XString(false),
476 FailureReasonCode: "all-unusable-records+ignored",
477 },
478 }
479 } else {
480 log.Debug("delivery with required starttls with dane verification", slog.Any("allowedtlshostnames", tlsHostnames))
481 }
482 // Based on CNAMEs followed and DNSSEC-secure status, we must allow up to 4 host
483 // names.
484 tlsHostnames = smtpclient.GatherTLSANames(haveMX, expandedNextHopAuthentic, expandedAuthentic, origNextHop, expandedNextHop, host.Domain, tlsaBaseDomain)
485 } else if !tlsDANE {
486 log.Debugx("not doing opportunistic dane after gathering tlsa records", err)
487 err = nil
488 hostResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, tlsaBaseDomain)
489 } else if err != nil {
490 fd := tlsrpt.Details(tlsrpt.ResultTLSAInvalid, "")
491 var errCode adns.ErrorCode
492 if errors.As(err, &errCode) {
493 fd.FailureReasonCode = fmt.Sprintf("extended-dns-error-%d-%s", errCode, strings.ReplaceAll(errCode.String(), " ", "-"))
494 if errCode.IsAuthentication() {
495 // Result: ../rfc/8460:580
496 fd.ResultType = tlsrpt.ResultDNSSECInvalid
497 countResultFailure()
498 }
499 }
500 hostResult = tlsrpt.Result{
501 Policy: tlsrpt.TLSAPolicy(daneRecords, tlsaBaseDomain),
502 FailureDetails: []tlsrpt.FailureDetails{fd},
503 }
504
505 if tlsRequiredNo {
506 log.Debugx("error gathering dane tlsa records with dane required, but continuing without validation due to tls-required-no message header", err)
507 err = nil
508 metricTLSRequiredNoIgnored.WithLabelValues("badtlsa").Inc()
509 }
510 }
511 // else, err is propagated below.
512 }
513
514 // todo: for requiretls, should an MTA-STS policy in mode testing be treated as good enough for requiretls? let's be strict and assume not.
515 // todo: ../rfc/8689:276 seems to specify stricter requirements on name in certificate than DANE (which allows original recipient domain name and cname-expanded name, and hints at following CNAME for MX targets as well, allowing both their original and expanded names too). perhaps the intent was just to say the name must be validated according to the relevant specifications?
516 // todo: for requiretls, should we allow no usable dane records with requiretls? dane allows it, but doesn't seem in spirit of requiretls, so not allowing it.
517 if err == nil && m.RequireTLS != nil && *m.RequireTLS && !(tlsDANE && len(daneRecords) > 0) && !enforceMTASTS {
518 log.Info("verified tls is required, but destination has no usable dane records and no mta-sts policy, canceling delivery attempt to host")
519 metricRequireTLSUnsupported.WithLabelValues("nopolicy").Inc()
520 // Resond with proper enhanced status code. ../rfc/8689:301
521 return false, tlsDANE, false, smtp.SePol7MissingReqTLS, remoteIP, "missing required tls verification mechanism", hostResult, false
522 }
523
524 // Dial the remote host given the IPs if no error yet.
525 var conn net.Conn
526 if err == nil {
527 if m.DialedIPs == nil {
528 m.DialedIPs = map[string][]net.IP{}
529 }
530 conn, remoteIP, err = smtpclient.Dial(ctx, log.Logger, dialer, host, ips, 25, m.DialedIPs, mox.Conf.Static.SpecifiedSMTPListenIPs)
531 }
532 cancel()
533
534 // Set error for metrics.
535 var result string
536 switch {
537 case err == nil:
538 result = "ok"
539 case errors.Is(err, os.ErrDeadlineExceeded), errors.Is(err, context.DeadlineExceeded):
540 result = "timeout"
541 case errors.Is(err, context.Canceled):
542 result = "canceled"
543 default:
544 result = "error"
545 }
546 metricConnection.WithLabelValues(result).Inc()
547 if err != nil {
548 log.Debugx("connecting to remote smtp", err, slog.Any("host", host))
549 return false, tlsDANE, false, "", remoteIP, fmt.Sprintf("dialing smtp server: %v", err), hostResult, false
550 }
551
552 var mailFrom string
553 if m.SenderLocalpart != "" || !m.SenderDomain.IsZero() {
554 mailFrom = m.Sender().XString(m.SMTPUTF8)
555 }
556 rcptTo := m.Recipient().XString(m.SMTPUTF8)
557
558 // todo future: get closer to timeouts specified in rfc? ../rfc/5321:3610
559 log = log.With(slog.Any("remoteip", remoteIP))
560 ctx, cancel = context.WithTimeout(mox.Shutdown, 30*time.Minute)
561 defer cancel()
562 mox.Connections.Register(conn, "smtpclient", "queue")
563
564 // Initialize SMTP session, sending EHLO/HELO and STARTTLS with specified tls mode.
565 var firstHost dns.Domain
566 var moreHosts []dns.Domain
567 if len(tlsHostnames) > 0 {
568 // For use with DANE-TA.
569 firstHost = tlsHostnames[0]
570 moreHosts = tlsHostnames[1:]
571 }
572 var verifiedRecord adns.TLSA
573 opts := smtpclient.Opts{
574 IgnoreTLSVerifyErrors: tlsRequiredNo,
575 RootCAs: mox.Conf.Static.TLS.CertPool,
576 DANERecords: daneRecords,
577 DANEMoreHostnames: moreHosts,
578 DANEVerifiedRecord: &verifiedRecord,
579 RecipientDomainResult: recipientDomainResult,
580 HostResult: &hostResult,
581 }
582 sc, err := smtpclient.New(ctx, log.Logger, conn, tlsMode, tlsPKIX, ourHostname, firstHost, opts)
583 defer func() {
584 if sc == nil {
585 conn.Close()
586 } else {
587 sc.Close()
588 }
589 mox.Connections.Unregister(conn)
590 }()
591 if err == nil && m.SenderAccount != "" {
592 // Remember the STARTTLS and REQUIRETLS support for this recipient domain.
593 // It is used in the webmail client, to show the recipient domain security mechanisms.
594 // We always save only the last connection we actually encountered. There may be
595 // multiple MX hosts, perhaps only some support STARTTLS and REQUIRETLS. We may not
596 // be accurate for the whole domain, but we're only storing a hint.
597 rdt := store.RecipientDomainTLS{
598 Domain: m.RecipientDomain.Domain.Name(),
599 STARTTLS: sc.TLSConnectionState() != nil,
600 RequireTLS: sc.SupportsRequireTLS(),
601 }
602 if err = updateRecipientDomainTLS(ctx, log, m.SenderAccount, rdt); err != nil {
603 err = fmt.Errorf("storing recipient domain tls status: %w", err)
604 }
605 }
606 if err == nil {
607 // SMTP session is ready. Finally try to actually deliver.
608 has8bit := m.Has8bit
609 smtputf8 := m.SMTPUTF8
610 var msg io.Reader = msgr
611 size := m.Size
612 if m.DSNUTF8 != nil && sc.Supports8BITMIME() && sc.SupportsSMTPUTF8() {
613 has8bit = true
614 smtputf8 = true
615 size = int64(len(m.DSNUTF8))
616 msg = bytes.NewReader(m.DSNUTF8)
617 }
618 err = sc.Deliver(ctx, mailFrom, rcptTo, size, msg, has8bit, smtputf8, m.RequireTLS != nil && *m.RequireTLS)
619 }
620 if err != nil {
621 log.Infox("delivery failed", err)
622 }
623 var cerr smtpclient.Error
624 switch {
625 case err == nil:
626 deliveryResult = "ok"
627 case errors.Is(err, os.ErrDeadlineExceeded), errors.Is(err, context.DeadlineExceeded):
628 deliveryResult = "timeout"
629 case errors.Is(err, context.Canceled):
630 deliveryResult = "canceled"
631 case errors.As(err, &cerr):
632 deliveryResult = "temperror"
633 if cerr.Permanent {
634 deliveryResult = "permerror"
635 }
636 default:
637 deliveryResult = "error"
638 }
639 if err == nil {
640 return false, tlsDANE, false, "", remoteIP, "", hostResult, true
641 } else if cerr, ok := err.(smtpclient.Error); ok {
642 // If we are being rejected due to policy reasons on the first
643 // attempt and remote has both IPv4 and IPv6, we'll give it
644 // another try. Our first IP may be in a block list, the address for
645 // the other family perhaps is not.
646 permanent := cerr.Permanent
647 if permanent && m.Attempts == 1 && dualstack && strings.HasPrefix(cerr.Secode, "7.") {
648 permanent = false
649 }
650 // If server does not implement requiretls, respond with that code. ../rfc/8689:301
651 secode := cerr.Secode
652 if errors.Is(cerr.Err, smtpclient.ErrRequireTLSUnsupported) {
653 secode = smtp.SePol7MissingReqTLS
654 metricRequireTLSUnsupported.WithLabelValues("norequiretls").Inc()
655 }
656 return permanent, tlsDANE, errors.Is(cerr, smtpclient.ErrTLS), secode, remoteIP, cerr.Error(), hostResult, false
657 } else {
658 return false, tlsDANE, errors.Is(cerr, smtpclient.ErrTLS), "", remoteIP, err.Error(), hostResult, false
659 }
660}
661
662// Update (overwite) last known starttls/requiretls support for recipient domain.
663func updateRecipientDomainTLS(ctx context.Context, log mlog.Log, senderAccount string, rdt store.RecipientDomainTLS) error {
664 acc, err := store.OpenAccount(log, senderAccount)
665 if err != nil {
666 return fmt.Errorf("open account: %w", err)
667 }
668 err = acc.DB.Write(ctx, func(tx *bstore.Tx) error {
669 // First delete any existing record.
670 if err := tx.Delete(&store.RecipientDomainTLS{Domain: rdt.Domain}); err != nil && err != bstore.ErrAbsent {
671 return fmt.Errorf("removing previous recipient domain tls status: %w", err)
672 }
673 // Insert new record.
674 return tx.Insert(&rdt)
675 })
676 if err != nil {
677 return fmt.Errorf("adding recipient domain tls status to account database: %w", err)
678 }
679 return nil
680}
681