1package smtpclient
2
3import (
4 "context"
5 "crypto/sha256"
6 "crypto/sha512"
7 "crypto/x509"
8 "errors"
9 "fmt"
10 "net"
11 "sort"
12 "strings"
13 "time"
14
15 "golang.org/x/exp/slog"
16
17 "github.com/mjl-/adns"
18
19 "github.com/mjl-/mox/dns"
20 "github.com/mjl-/mox/mlog"
21)
22
23var (
24 errCNAMELoop = errors.New("cname loop")
25 errCNAMELimit = errors.New("too many cname records")
26 errDNS = errors.New("dns lookup error")
27 errNoMail = errors.New("domain does not accept email as indicated with single dot for mx record")
28)
29
30// GatherDestinations looks up the hosts to deliver email to a domain ("next-hop").
31// If it is an IP address, it is the only destination to try. Otherwise CNAMEs of
32// the domain are followed. Then MX records for the expanded CNAME are looked up.
33// If no MX record is present, the original domain is returned. If an MX record is
34// present but indicates the domain does not accept email, ErrNoMail is returned.
35// If valid MX records were found, the MX target hosts are returned.
36//
37// haveMX indicates if an MX record was found.
38//
39// origNextHopAuthentic indicates if the DNS record for the initial domain name was
40// DNSSEC secure (CNAME, MX).
41//
42// expandedNextHopAuthentic indicates if the DNS records after following CNAMEs were
43// DNSSEC secure.
44//
45// These authentic results are needed for DANE, to determine where to look up TLSA
46// records, and which names to allow in the remote TLS certificate. If MX records
47// were found, both the original and expanded next-hops must be authentic for DANE
48// to be option. For a non-IP with no MX records found, the authentic result can
49// be used to decide which of the names to use as TLSA base domain.
50func GatherDestinations(ctx context.Context, elog *slog.Logger, resolver dns.Resolver, origNextHop dns.IPDomain) (haveMX, origNextHopAuthentic, expandedNextHopAuthentic bool, expandedNextHop dns.Domain, hosts []dns.IPDomain, permanent bool, err error) {
51 // ../rfc/5321:3824
52
53 log := mlog.New("smtpclient", elog)
54
55 // IP addresses are dialed directly, and don't have TLSA records.
56 if len(origNextHop.IP) > 0 {
57 return false, false, false, expandedNextHop, []dns.IPDomain{origNextHop}, false, nil
58 }
59
60 // We start out assuming the result is authentic. Updated with each lookup.
61 origNextHopAuthentic = true
62 expandedNextHopAuthentic = true
63
64 // We start out delivering to the recipient domain. We follow CNAMEs.
65 rcptDomain := origNextHop.Domain
66 // Domain we are actually delivering to, after following CNAME record(s).
67 expandedNextHop = rcptDomain
68 // Keep track of CNAMEs we have followed, to detect loops.
69 domainsSeen := map[string]bool{}
70 for i := 0; ; i++ {
71 if domainsSeen[expandedNextHop.ASCII] {
72 // todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
73 err := fmt.Errorf("%w: recipient domain %s: already saw %s", errCNAMELoop, rcptDomain, expandedNextHop)
74 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
75 }
76 domainsSeen[expandedNextHop.ASCII] = true
77
78 // note: The Go resolver returns the requested name if the domain has no CNAME
79 // record but has a host record.
80 if i == 16 {
81 // We have a maximum number of CNAME records we follow. There is no hard limit for
82 // DNS, and you might think folks wouldn't configure CNAME chains at all, but for
83 // (non-mail) domains, CNAME chains of 10 records have been encountered according
84 // to the internet.
85 // todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
86 err := fmt.Errorf("%w: recipient domain %s, last resolved domain %s", errCNAMELimit, rcptDomain, expandedNextHop)
87 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
88 }
89
90 // Do explicit CNAME lookup. Go's LookupMX also resolves CNAMEs, but we want to
91 // know the final name, and we're interested in learning if the first vs later
92 // results were DNSSEC-(in)secure.
93 // ../rfc/5321:3838 ../rfc/3974:197
94 cctx, ccancel := context.WithTimeout(ctx, 30*time.Second)
95 defer ccancel()
96 cname, cnameResult, err := resolver.LookupCNAME(cctx, expandedNextHop.ASCII+".")
97 ccancel()
98 if i == 0 {
99 origNextHopAuthentic = origNextHopAuthentic && cnameResult.Authentic
100 }
101 expandedNextHopAuthentic = expandedNextHopAuthentic && cnameResult.Authentic
102 if err != nil && !dns.IsNotFound(err) {
103 err = fmt.Errorf("%w: cname lookup for %s: %v", errDNS, expandedNextHop, err)
104 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
105 }
106 if err == nil && cname != expandedNextHop.ASCII+"." {
107 d, err := dns.ParseDomain(strings.TrimSuffix(cname, "."))
108 if err != nil {
109 // todo: only mark as permanent failure if TTLs for all records are beyond latest possibly delivery retry we would do.
110 err = fmt.Errorf("%w: parsing cname domain %s: %v", errDNS, expandedNextHop, err)
111 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
112 }
113 expandedNextHop = d
114 // Start again with new domain.
115 continue
116 }
117
118 // Not a CNAME, so lookup MX record.
119 mctx, mcancel := context.WithTimeout(ctx, 30*time.Second)
120 defer mcancel()
121 // Note: LookupMX can return an error and still return records: Invalid records are
122 // filtered out and an error returned. We must process any records that are valid.
123 // Only if all are unusable will we return an error. ../rfc/5321:3851
124 mxl, mxResult, err := resolver.LookupMX(mctx, expandedNextHop.ASCII+".")
125 mcancel()
126 if i == 0 {
127 origNextHopAuthentic = origNextHopAuthentic && mxResult.Authentic
128 }
129 expandedNextHopAuthentic = expandedNextHopAuthentic && mxResult.Authentic
130 if err != nil && len(mxl) == 0 {
131 if !dns.IsNotFound(err) {
132 err = fmt.Errorf("%w: mx lookup for %s: %v", errDNS, expandedNextHop, err)
133 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, false, err
134 }
135
136 // No MX record, attempt delivery directly to host. ../rfc/5321:3842
137 hosts = []dns.IPDomain{{Domain: expandedNextHop}}
138 return false, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hosts, false, nil
139 } else if err != nil {
140 log.Infox("mx record has some invalid records, keeping only the valid mx records", err)
141 }
142
143 // ../rfc/7505:122
144 if err == nil && len(mxl) == 1 && mxl[0].Host == "." {
145 // Note: Depending on MX record TTL, this record may be replaced with a more
146 // receptive MX record before our final delivery attempt. But it's clearly the
147 // explicit desire not to be bothered with email delivery attempts, so mark failure
148 // as permanent.
149 return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, true, errNoMail
150 }
151
152 // The Go resolver already sorts by preference, randomizing records of same
153 // preference. ../rfc/5321:3885
154 for _, mx := range mxl {
155 // Parsing lax (unless pedantic mode) for MX targets with underscores as seen in the wild.
156 host, err := dns.ParseDomainLax(strings.TrimSuffix(mx.Host, "."))
157 if err != nil {
158 // note: should not happen because Go resolver already filters these out.
159 err = fmt.Errorf("%w: invalid host name in mx record %q: %v", errDNS, mx.Host, err)
160 return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, nil, true, err
161 }
162 hosts = append(hosts, dns.IPDomain{Domain: host})
163 }
164 if len(hosts) > 0 {
165 err = nil
166 }
167 return true, origNextHopAuthentic, expandedNextHopAuthentic, expandedNextHop, hosts, false, err
168 }
169}
170
171// GatherIPs looks up the IPs to try for connecting to host, with the IPs ordered
172// to take previous attempts into account. For use with DANE, the CNAME-expanded
173// name is returned, and whether the DNS responses were authentic.
174func GatherIPs(ctx context.Context, elog *slog.Logger, resolver dns.Resolver, host dns.IPDomain, dialedIPs map[string][]net.IP) (authentic bool, expandedAuthentic bool, expandedHost dns.Domain, ips []net.IP, dualstack bool, rerr error) {
175 log := mlog.New("smtpclient", elog)
176
177 if len(host.IP) > 0 {
178 return false, false, dns.Domain{}, []net.IP{host.IP}, false, nil
179 }
180
181 authentic = true
182 expandedAuthentic = true
183
184 // The Go resolver automatically follows CNAMEs, which is not allowed for host
185 // names in MX records, but seems to be accepted and is documented for DANE SMTP
186 // behaviour. We resolve CNAMEs explicitly, so we can return the final name, which
187 // DANE needs. ../rfc/7671:246
188 // ../rfc/5321:3861 ../rfc/2181:661 ../rfc/7672:1382 ../rfc/7671:1030
189 name := host.Domain.ASCII + "."
190
191 for i := 0; ; i++ {
192 cname, result, err := resolver.LookupCNAME(ctx, name)
193 if i == 0 {
194 authentic = result.Authentic
195 }
196 expandedAuthentic = expandedAuthentic && result.Authentic
197 if dns.IsNotFound(err) {
198 break
199 } else if err != nil {
200 return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, err
201 } else if strings.TrimSuffix(cname, ".") == strings.TrimSuffix(name, ".") {
202 break
203 }
204 if i > 10 {
205 return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, fmt.Errorf("mx lookup: %w", errCNAMELimit)
206 }
207 name = strings.TrimSuffix(cname, ".") + "."
208 }
209
210 if name == host.Domain.ASCII+"." {
211 expandedHost = host.Domain
212 } else {
213 var err error
214 expandedHost, err = dns.ParseDomain(strings.TrimSuffix(name, "."))
215 if err != nil {
216 return authentic, expandedAuthentic, dns.Domain{}, nil, dualstack, fmt.Errorf("parsing cname-resolved domain: %w", err)
217 }
218 }
219
220 ipaddrs, result, err := resolver.LookupIPAddr(ctx, name)
221 authentic = authentic && result.Authentic
222 expandedAuthentic = expandedAuthentic && result.Authentic
223 if err != nil || len(ipaddrs) == 0 {
224 return authentic, expandedAuthentic, expandedHost, nil, false, fmt.Errorf("looking up %q: %w", name, err)
225 }
226 var have4, have6 bool
227 for _, ipaddr := range ipaddrs {
228 ips = append(ips, ipaddr.IP)
229 if ipaddr.IP.To4() == nil {
230 have6 = true
231 } else {
232 have4 = true
233 }
234 }
235 dualstack = have4 && have6
236 prevIPs := dialedIPs[host.String()]
237 if len(prevIPs) > 0 {
238 prevIP := prevIPs[len(prevIPs)-1]
239 prevIs4 := prevIP.To4() != nil
240 sameFamily := 0
241 for _, ip := range prevIPs {
242 is4 := ip.To4() != nil
243 if prevIs4 == is4 {
244 sameFamily++
245 }
246 }
247 preferPrev := sameFamily == 1
248 // We use stable sort so any preferred/randomized listing from DNS is kept intact.
249 sort.SliceStable(ips, func(i, j int) bool {
250 aIs4 := ips[i].To4() != nil
251 bIs4 := ips[j].To4() != nil
252 if aIs4 != bIs4 {
253 // Prefer "i" if it is not same address family.
254 return aIs4 != prevIs4
255 }
256 // Prefer "i" if it is the same as last and we should be preferring it.
257 return preferPrev && ips[i].Equal(prevIP)
258 })
259 log.Debug("ordered ips for dialing", slog.Any("ips", ips))
260 }
261 return
262}
263
264// GatherTLSA looks up TLSA record for either expandedHost or host, and returns
265// records usable for DANE with SMTP, and host names to allow in DANE-TA
266// certificate name verification.
267//
268// If no records are found, this isn't necessarily an error. It can just indicate
269// the domain/host does not opt-in to DANE, and nil records and a nil error are
270// returned.
271//
272// Only usable records are returned. If any record was found, DANE is required and
273// this is indicated with daneRequired. If no usable records remain, the caller
274// must do TLS, but not verify the remote TLS certificate.
275//
276// Returned values are always meaningful, also when an error was returned.
277func GatherTLSA(ctx context.Context, elog *slog.Logger, resolver dns.Resolver, host dns.Domain, expandedAuthentic bool, expandedHost dns.Domain) (daneRequired bool, daneRecords []adns.TLSA, tlsaBaseDomain dns.Domain, err error) {
278 log := mlog.New("smtpclient", elog)
279
280 // ../rfc/7672:912
281 // This function is only called when the lookup of host was authentic.
282
283 var l []adns.TLSA
284
285 tlsaBaseDomain = host
286 if host == expandedHost || !expandedAuthentic {
287 l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", host)
288 } else if expandedAuthentic {
289 // ../rfc/7672:934
290 tlsaBaseDomain = expandedHost
291 l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", expandedHost)
292 if err == nil && len(l) == 0 {
293 tlsaBaseDomain = host
294 l, err = lookupTLSACNAME(ctx, log, resolver, 25, "tcp", host)
295 }
296 }
297 if len(l) == 0 || err != nil {
298 daneRequired = err != nil
299 log.Debugx("gathering tlsa records failed", err, slog.Bool("danerequired", daneRequired), slog.Any("basedomain", tlsaBaseDomain))
300 return daneRequired, nil, tlsaBaseDomain, err
301 }
302 daneRequired = len(l) > 0
303 l = filterUsableTLSARecords(log, l)
304 log.Debug("tlsa records exist",
305 slog.Bool("danerequired", daneRequired),
306 slog.Any("records", l),
307 slog.Any("basedomain", tlsaBaseDomain))
308 return daneRequired, l, tlsaBaseDomain, err
309}
310
311// lookupTLSACNAME composes a TLSA domain name to lookup, follows CNAMEs and looks
312// up TLSA records. no TLSA records exist, a nil error is returned as it means
313// the host does not opt-in to DANE.
314func lookupTLSACNAME(ctx context.Context, log mlog.Log, resolver dns.Resolver, port int, protocol string, host dns.Domain) (l []adns.TLSA, rerr error) {
315 name := fmt.Sprintf("_%d._%s.%s", port, protocol, host.ASCII+".")
316 for i := 0; ; i++ {
317 cname, result, err := resolver.LookupCNAME(ctx, name)
318 if dns.IsNotFound(err) {
319 if !result.Authentic {
320 log.Debugx("cname nxdomain result during tlsa lookup not authentic, not doing dane for host", err, slog.Any("host", host), slog.String("name", name))
321 return nil, nil
322 }
323 break
324 } else if err != nil {
325 return nil, fmt.Errorf("looking up cname for tlsa candidate base domain: %w", err)
326 } else if !result.Authentic {
327 log.Debugx("cname result during tlsa lookup not authentic, not doing dane for host", err, slog.Any("host", host), slog.String("name", name))
328 return nil, nil
329 }
330 if i == 10 {
331 return nil, fmt.Errorf("looking up cname for tlsa candidate base domain: %w", errCNAMELimit)
332 }
333 name = strings.TrimSuffix(cname, ".") + "."
334 }
335 var result adns.Result
336 var err error
337 l, result, err = resolver.LookupTLSA(ctx, 0, "", name)
338 if dns.IsNotFound(err) || err == nil && len(l) == 0 {
339 log.Debugx("no tlsa records for host, not doing dane", err,
340 slog.Any("host", host),
341 slog.String("name", name),
342 slog.Bool("authentic", result.Authentic))
343 return nil, nil
344 } else if err != nil {
345 return nil, fmt.Errorf("looking up tlsa records for tlsa candidate base domain: %w", err)
346 } else if !result.Authentic {
347 log.Debugx("tlsa lookup not authentic, not doing dane for host", err, slog.Any("host", host), slog.String("name", name))
348 return nil, nil
349 }
350 return l, nil
351}
352
353func filterUsableTLSARecords(log mlog.Log, l []adns.TLSA) []adns.TLSA {
354 // Gather "usable" records. ../rfc/7672:708
355 o := 0
356 for _, r := range l {
357 // A record is not usable when we don't recognize parameters. ../rfc/6698:649
358
359 switch r.Usage {
360 case adns.TLSAUsageDANETA, adns.TLSAUsageDANEEE:
361 default:
362 // We can regard PKIX-TA and PKIX-EE as "unusable" with SMTP DANE. ../rfc/7672:1304
363 continue
364 }
365 switch r.Selector {
366 case adns.TLSASelectorCert, adns.TLSASelectorSPKI:
367 default:
368 continue
369 }
370 switch r.MatchType {
371 case adns.TLSAMatchTypeFull:
372 if r.Selector == adns.TLSASelectorCert {
373 if _, err := x509.ParseCertificate(r.CertAssoc); err != nil {
374 log.Debugx("parsing certificate in dane tlsa record, ignoring", err)
375 continue
376 }
377 } else if r.Selector == adns.TLSASelectorSPKI {
378 if _, err := x509.ParsePKIXPublicKey(r.CertAssoc); err != nil {
379 log.Debugx("parsing certificate in dane tlsa record, ignoring", err)
380 continue
381 }
382 }
383 case adns.TLSAMatchTypeSHA256:
384 if len(r.CertAssoc) != sha256.Size {
385 log.Debug("dane tlsa record with wrong data size for sha2-256", slog.Int("got", len(r.CertAssoc)), slog.Int("expect", sha256.Size))
386 continue
387 }
388 case adns.TLSAMatchTypeSHA512:
389 if len(r.CertAssoc) != sha512.Size {
390 log.Debug("dane tlsa record with wrong data size for sha2-512", slog.Int("got", len(r.CertAssoc)), slog.Int("expect", sha512.Size))
391 continue
392 }
393 default:
394 continue
395 }
396
397 l[o] = r
398 o++
399 }
400 return l[:o]
401}
402
403// GatherTLSANames returns the allowed names in TLS certificates for verification
404// with PKIX-* or DANE-TA. The first name should be used for SNI.
405//
406// If there was no MX record, the next-hop domain parameters (i.e. the original
407// email destination host, and its CNAME-expanded host, that has MX records) are
408// ignored and only the base domain parameters are taken into account.
409func GatherTLSANames(haveMX, expandedNextHopAuthentic, expandedTLSABaseDomainAuthentic bool, origNextHop, expandedNextHop, origTLSABaseDomain, expandedTLSABaseDomain dns.Domain) []dns.Domain {
410 // Gather the names to check against TLS certificate. ../rfc/7672:1318
411 if !haveMX {
412 // ../rfc/7672:1336
413 if !expandedTLSABaseDomainAuthentic || origTLSABaseDomain == expandedTLSABaseDomain {
414 return []dns.Domain{origTLSABaseDomain}
415 }
416 return []dns.Domain{expandedTLSABaseDomain, origTLSABaseDomain}
417 } else if expandedNextHopAuthentic {
418 // ../rfc/7672:1326
419 var l []dns.Domain
420 if expandedTLSABaseDomainAuthentic {
421 l = []dns.Domain{expandedTLSABaseDomain}
422 }
423 if expandedTLSABaseDomain != origTLSABaseDomain {
424 l = append(l, origTLSABaseDomain)
425 }
426 l = append(l, origNextHop)
427 if origNextHop != expandedNextHop {
428 l = append(l, expandedNextHop)
429 }
430 return l
431 } else {
432 // We don't attempt DANE after insecure MX, but behaviour for it is specified.
433 // ../rfc/7672:1332
434 return []dns.Domain{origNextHop}
435 }
436}
437