1// Package dane verifies TLS certificates through DNSSEC-verified TLSA records.
2//
3// On the internet, TLS certificates are commonly verified by checking if they are
4// signed by one of many commonly trusted Certificate Authorities (CAs). This is
5// PKIX or WebPKI. With DANE, TLS certificates are verified through
6// DNSSEC-protected DNS records of type TLSA. These TLSA records specify the rules
7// for verification ("usage") and whether a full certificate ("selector" cert) is
8// checked or only its "subject public key info" ("selector" spki). The (hash of)
9// the certificate or "spki" is included in the TLSA record ("matchtype").
10//
11// DANE SMTP connections have two allowed "usages" (verification rules):
12// - DANE-EE, which only checks if the certificate or spki match, without the
13// WebPKI verification of expiration, name or signed-by-trusted-party verification.
14// - DANE-TA, which does verification similar to PKIX/WebPKI, but verifies against
15// a certificate authority ("trust anchor", or "TA") specified in the TLSA record
16// instead of the CA pool.
17//
18// DANE has two more "usages", that may be used with protocols other than SMTP:
19// - PKIX-EE, which matches the certificate or spki, and also verifies the
20// certificate against the CA pool.
21// - PKIX-TA, which verifies the certificate or spki against a "trust anchor"
22// specified in the TLSA record, that also has to be trusted by the CA pool.
23//
24// TLSA records are looked up for a specific port number, protocol (tcp/udp) and
25// host name. Each port can have different TLSA records. TLSA records must be
26// signed and verified with DNSSEC before they can be trusted and used.
27//
28// TLSA records are looked up under "TLSA candidate base domains". The domain
29// where the TLSA records are found is the "TLSA base domain". If the host to
30// connect to is a CNAME that can be followed with DNSSEC protection, it is the
31// first TLSA candidate base domain. If no protected records are found, the
32// original host name is the second TLSA candidate base domain.
33//
34// For TLS connections, the TLSA base domain is used with SNI during the
35// handshake.
36//
37// For TLS certificate verification that requires PKIX/WebPKI/trusted-anchor
38// verification (all except DANE-EE), the potential second TLSA candidate base
39// domain name is also a valid hostname. With SMTP, additionally for hosts found in
40// MX records for a "next-hop domain", the "original next-hop domain" (domain of an
41// email address to deliver to) is also a valid name, as is the "CNAME-expanded
42// original next-hop domain", bringing the potential total allowed names to four
43// (if CNAMEs are followed for the MX hosts).
44package dane
45
46// todo: why is https://datatracker.ietf.org/doc/html/draft-barnes-dane-uks-00 not in use? sounds reasonable.
47// todo: add a DialSRV function that accepts a domain name, looks up srv records, dials the service, verifies dane certificate and returns the connection. for ../rfc/7673
48
49import (
50 "bytes"
51 "context"
52 "crypto/sha256"
53 "crypto/sha512"
54 "crypto/tls"
55 "crypto/x509"
56 "errors"
57 "fmt"
58 "log/slog"
59 "net"
60 "strings"
61 "time"
62
63 "github.com/mjl-/adns"
64
65 "github.com/mjl-/mox/dns"
66 "github.com/mjl-/mox/mlog"
67 "github.com/mjl-/mox/stub"
68 "slices"
69)
70
71var (
72 MetricVerify stub.Counter = stub.CounterIgnore{}
73 MetricVerifyErrors stub.Counter = stub.CounterIgnore{}
74)
75
76var (
77 // ErrNoRecords means no TLSA records were found and host has not opted into DANE.
78 ErrNoRecords = errors.New("dane: no tlsa records")
79
80 // ErrInsecure indicates insecure DNS responses were encountered while looking up
81 // the host, CNAME records, or TLSA records.
82 ErrInsecure = errors.New("dane: dns lookups insecure")
83
84 // ErrNoMatch means some TLSA records were found, but none can be verified against
85 // the remote TLS certificate.
86 ErrNoMatch = errors.New("dane: no match between certificate and tlsa records")
87)
88
89// VerifyError is an error encountered while verifying a DANE TLSA record. For
90// example, an error encountered with x509 certificate trusted-anchor verification.
91// A TLSA record that does not match a TLS certificate is not a VerifyError.
92type VerifyError struct {
93 Err error // Underlying error, possibly from crypto/x509.
94 Record adns.TLSA // Cause of error.
95}
96
97// Error returns a string explaining this is a dane verify error along with the
98// underlying error.
99func (e VerifyError) Error() string {
100 return fmt.Sprintf("dane verify error: %s", e.Err)
101}
102
103// Unwrap returns the underlying error.
104func (e VerifyError) Unwrap() error {
105 return e.Err
106}
107
108// Dial looks up DNSSEC-protected DANE TLSA records for the domain name and
109// port/service in address, checks for allowed usages, makes a network connection
110// and verifies the remote certificate against the TLSA records. If verification
111// succeeds, the verified record is returned.
112//
113// Different protocols require different usages. For example, SMTP with STARTTLS
114// for delivery only allows usages DANE-TA and DANE-EE. If allowedUsages is
115// non-nil, only the specified usages are taken into account when verifying, and
116// any others ignored.
117//
118// Errors that can be returned, possibly in wrapped form:
119// - ErrNoRecords, also in case the DNS response indicates "not found".
120// - adns.DNSError, potentially wrapping adns.ExtendedError of which some can
121// indicate DNSSEC errors.
122// - ErrInsecure
123// - VerifyError, potentially wrapping errors from crypto/x509.
124func Dial(ctx context.Context, elog *slog.Logger, resolver dns.Resolver, network, address string, allowedUsages []adns.TLSAUsage, pkixRoots *x509.CertPool) (net.Conn, adns.TLSA, error) {
125 log := mlog.New("dane", elog)
126
127 // Split host and port.
128 host, portstr, err := net.SplitHostPort(address)
129 if err != nil {
130 return nil, adns.TLSA{}, fmt.Errorf("parsing address: %w", err)
131 }
132 port, err := resolver.LookupPort(ctx, network, portstr)
133 if err != nil {
134 return nil, adns.TLSA{}, fmt.Errorf("parsing port: %w", err)
135 }
136
137 hostDom, err := dns.ParseDomain(strings.TrimSuffix(host, "."))
138 if err != nil {
139 return nil, adns.TLSA{}, fmt.Errorf("parsing host: %w", err)
140 }
141
142 // ../rfc/7671:1015
143 // First follow CNAMEs for host. If the path to the final name is secure, we must
144 // lookup TLSA there first, then fallback to the original name. If the final name
145 // is secure that's also the SNI server name we must use, with the original name as
146 // allowed host during certificate name checks (for all TLSA usages other than
147 // DANE-EE).
148 cnameDom := hostDom
149 cnameAuthentic := true
150 for i := 0; ; i += 1 {
151 if i == 10 {
152 return nil, adns.TLSA{}, fmt.Errorf("too many cname lookups")
153 }
154 cname, cnameResult, err := resolver.LookupCNAME(ctx, cnameDom.ASCII+".")
155 cnameAuthentic = cnameAuthentic && cnameResult.Authentic
156 if !cnameResult.Authentic && i == 0 {
157 return nil, adns.TLSA{}, fmt.Errorf("%w: cname lookup insecure", ErrInsecure)
158 } else if dns.IsNotFound(err) {
159 break
160 } else if err != nil {
161 return nil, adns.TLSA{}, fmt.Errorf("resolving cname %s: %w", cnameDom, err)
162 } else if d, err := dns.ParseDomain(strings.TrimSuffix(cname, ".")); err != nil {
163 return nil, adns.TLSA{}, fmt.Errorf("parsing cname: %w", err)
164 } else {
165 cnameDom = d
166 }
167 }
168
169 // We lookup the IP.
170 ipnetwork := "ip"
171 if strings.HasSuffix(network, "4") {
172 ipnetwork += "4"
173 } else if strings.HasSuffix(network, "6") {
174 ipnetwork += "6"
175 }
176 ips, _, err := resolver.LookupIP(ctx, ipnetwork, cnameDom.ASCII+".")
177 // note: For SMTP with opportunistic DANE we would stop here with an insecure
178 // response. But as long as long as we have a verified original tlsa base name, we
179 // can continue with regular DANE.
180 if err != nil {
181 return nil, adns.TLSA{}, fmt.Errorf("resolving ips: %w", err)
182 } else if len(ips) == 0 {
183 return nil, adns.TLSA{}, &adns.DNSError{Err: "no ips for host", Name: cnameDom.ASCII, IsNotFound: true}
184 }
185
186 // Lookup TLSA records. If resolving CNAME was secure, we try that first. Otherwise
187 // we try at the secure original domain.
188 baseDom := hostDom
189 if cnameAuthentic {
190 baseDom = cnameDom
191 }
192 var records []adns.TLSA
193 var result adns.Result
194 for {
195 var err error
196 records, result, err = resolver.LookupTLSA(ctx, port, network, baseDom.ASCII+".")
197 // If no (secure) records can be found at the final cname, and there is an original
198 // name, try at original name.
199 // ../rfc/7671:1015
200 if baseDom != hostDom && (dns.IsNotFound(err) || !result.Authentic) {
201 baseDom = hostDom
202 continue
203 }
204 if !result.Authentic {
205 return nil, adns.TLSA{}, ErrInsecure
206 } else if dns.IsNotFound(err) {
207 return nil, adns.TLSA{}, ErrNoRecords
208 } else if err != nil {
209 return nil, adns.TLSA{}, fmt.Errorf("lookup dane tlsa records: %w", err)
210 }
211 break
212 }
213
214 // Keep only the allowed usages.
215 if allowedUsages != nil {
216 o := 0
217 for _, r := range records {
218 if slices.Contains(allowedUsages, r.Usage) {
219 records[o] = r
220 o++
221 }
222 }
223 records = records[:o]
224 if len(records) == 0 {
225 // No point in dialing when we know we won't be able to verify the remote TLS
226 // certificate.
227 return nil, adns.TLSA{}, fmt.Errorf("no usable tlsa records remaining: %w", ErrNoMatch)
228 }
229 }
230
231 // We use the base domain for SNI, allowing the original domain as well.
232 // ../rfc/7671:1021
233 var moreAllowedHosts []dns.Domain
234 if baseDom != hostDom {
235 moreAllowedHosts = []dns.Domain{hostDom}
236 }
237
238 // Dial the remote host.
239 timeout := 30 * time.Second
240 if deadline, ok := ctx.Deadline(); ok && len(ips) > 0 {
241 timeout = time.Until(deadline) / time.Duration(len(ips))
242 }
243 dialer := &net.Dialer{Timeout: timeout}
244 var conn net.Conn
245 var dialErrs []error
246 for _, ip := range ips {
247 addr := net.JoinHostPort(ip.String(), portstr)
248 c, err := dialer.DialContext(ctx, network, addr)
249 if err != nil {
250 dialErrs = append(dialErrs, err)
251 continue
252 }
253 conn = c
254 break
255 }
256 if conn == nil {
257 return nil, adns.TLSA{}, errors.Join(dialErrs...)
258 }
259
260 var verifiedRecord adns.TLSA
261 config := TLSClientConfig(log.Logger, records, baseDom, moreAllowedHosts, &verifiedRecord, pkixRoots)
262 tlsConn := tls.Client(conn, &config)
263 if err := tlsConn.HandshakeContext(ctx); err != nil {
264 xerr := conn.Close()
265 log.Check(xerr, "closing connection")
266 return nil, adns.TLSA{}, err
267 }
268 return tlsConn, verifiedRecord, nil
269}
270
271// TLSClientConfig returns a tls.Config to be used for dialing/handshaking a
272// TLS connection with DANE verification.
273//
274// Callers should only pass records that are allowed for the intended use. DANE
275// with SMTP only allows DANE-EE and DANE-TA usages, not the PKIX-usages.
276//
277// The config has InsecureSkipVerify set to true, with a custom VerifyConnection
278// function for verifying DANE. Its VerifyConnection can return ErrNoMatch and
279// additionally one or more (wrapped) errors of type VerifyError.
280//
281// The TLS config uses allowedHost for SNI.
282//
283// If verifiedRecord is not nil, it is set to the record that was successfully
284// verified, if any.
285func TLSClientConfig(elog *slog.Logger, records []adns.TLSA, allowedHost dns.Domain, moreAllowedHosts []dns.Domain, verifiedRecord *adns.TLSA, pkixRoots *x509.CertPool) tls.Config {
286 log := mlog.New("dane", elog)
287 return tls.Config{
288 ServerName: allowedHost.ASCII, // For SNI.
289 InsecureSkipVerify: true,
290 VerifyConnection: func(cs tls.ConnectionState) error {
291 verified, record, err := Verify(log.Logger, records, cs, allowedHost, moreAllowedHosts, pkixRoots)
292 log.Debugx("dane verification", err, slog.Bool("verified", verified), slog.Any("record", record))
293 if verified {
294 if verifiedRecord != nil {
295 *verifiedRecord = record
296 }
297 return nil
298 } else if err == nil {
299 return ErrNoMatch
300 }
301 return fmt.Errorf("%w, and error(s) encountered during verification: %w", ErrNoMatch, err)
302 },
303 MinVersion: tls.VersionTLS12, // ../rfc/8996:31 ../rfc/8997:66
304 }
305}
306
307// Verify checks if the TLS connection state can be verified against DANE TLSA
308// records.
309//
310// allowedHost along with the optional moreAllowedHosts are the host names that are
311// allowed during certificate verification (as used by PKIX-TA, PKIX-EE, DANE-TA,
312// but not DANE-EE). A typical connection would allow just one name, but some uses
313// of DANE allow multiple, like SMTP which allow up to four valid names for a TLS
314// certificate based on MX/CNAME/TLSA/DNSSEC lookup results.
315//
316// When one of the records matches, Verify returns true, along with the matching
317// record and a nil error.
318// If there is no match, then in the typical case Verify returns: false, a zero
319// record value and a nil error.
320// If an error is encountered while verifying a record, e.g. for x509
321// trusted-anchor verification, an error may be returned, typically one or more
322// (wrapped) errors of type VerifyError.
323//
324// Verify is useful when DANE verification and its results has to be done
325// separately from other validation, e.g. for MTA-STS. The caller can create a
326// tls.Config with a VerifyConnection function that checks DANE and MTA-STS
327// separately.
328func Verify(elog *slog.Logger, records []adns.TLSA, cs tls.ConnectionState, allowedHost dns.Domain, moreAllowedHosts []dns.Domain, pkixRoots *x509.CertPool) (verified bool, matching adns.TLSA, rerr error) {
329 log := mlog.New("dane", elog)
330 MetricVerify.Inc()
331 if len(records) == 0 {
332 MetricVerifyErrors.Inc()
333 return false, adns.TLSA{}, fmt.Errorf("verify requires at least one tlsa record")
334 }
335 var errs []error
336 for _, r := range records {
337 ok, err := verifySingle(log, r, cs, allowedHost, moreAllowedHosts, pkixRoots)
338 if err != nil {
339 errs = append(errs, VerifyError{err, r})
340 } else if ok {
341 return true, r, nil
342 }
343 }
344 MetricVerifyErrors.Inc()
345 return false, adns.TLSA{}, errors.Join(errs...)
346}
347
348// verifySingle verifies the TLS connection against a single DANE TLSA record.
349//
350// If the remote TLS certificate matches with the TLSA record, true is
351// returned. Errors may be encountered while verifying, e.g. when checking one
352// of the allowed hosts against a TLSA record. A typical non-matching/verified
353// TLSA record returns a nil error. But in some cases, e.g. when encountering
354// errors while verifying certificates against a trust-anchor, an error can be
355// returned with one or more underlying x509 verification errors. A nil-nil error
356// is only returned when verified is false.
357func verifySingle(log mlog.Log, tlsa adns.TLSA, cs tls.ConnectionState, allowedHost dns.Domain, moreAllowedHosts []dns.Domain, pkixRoots *x509.CertPool) (verified bool, rerr error) {
358 if len(cs.PeerCertificates) == 0 {
359 return false, fmt.Errorf("no server certificate")
360 }
361
362 match := func(cert *x509.Certificate) bool {
363 var buf []byte
364 switch tlsa.Selector {
365 case adns.TLSASelectorCert:
366 buf = cert.Raw
367 case adns.TLSASelectorSPKI:
368 buf = cert.RawSubjectPublicKeyInfo
369 default:
370 return false
371 }
372
373 switch tlsa.MatchType {
374 case adns.TLSAMatchTypeFull:
375 case adns.TLSAMatchTypeSHA256:
376 d := sha256.Sum256(buf)
377 buf = d[:]
378 case adns.TLSAMatchTypeSHA512:
379 d := sha512.Sum512(buf)
380 buf = d[:]
381 default:
382 return false
383 }
384
385 return bytes.Equal(buf, tlsa.CertAssoc)
386 }
387
388 pkixVerify := func(host dns.Domain) ([][]*x509.Certificate, error) {
389 // Default Verify checks for expiration. We pass the host name to check. And we
390 // configure the intermediates. The roots are filled in by the x509 package.
391 opts := x509.VerifyOptions{
392 DNSName: host.ASCII,
393 Intermediates: x509.NewCertPool(),
394 Roots: pkixRoots,
395 }
396 for _, cert := range cs.PeerCertificates[1:] {
397 opts.Intermediates.AddCert(cert)
398 }
399 chains, err := cs.PeerCertificates[0].Verify(opts)
400 return chains, err
401 }
402
403 switch tlsa.Usage {
404 case adns.TLSAUsagePKIXTA:
405 // We cannot get at the system trusted ca certificates to look for the trusted
406 // anchor. So we just ask Go to verify, then see if any of the chains include the
407 // ca certificate.
408 var errs []error
409 for _, host := range append([]dns.Domain{allowedHost}, moreAllowedHosts...) {
410 chains, err := pkixVerify(host)
411 log.Debugx("pkix-ta verify", err)
412 if err != nil {
413 errs = append(errs, err)
414 continue
415 }
416 // The chains by x509's Verify should include the longest possible match, so it is
417 // sure to include the trusted anchor. ../rfc/7671:835
418 for _, chain := range chains {
419 // If pkix verified, check if any of the certificates match.
420 for i := len(chain) - 1; i >= 0; i-- {
421 if match(chain[i]) {
422 return true, nil
423 }
424 }
425 }
426 }
427 return false, errors.Join(errs...)
428
429 case adns.TLSAUsagePKIXEE:
430 // Check for a certificate match.
431 if !match(cs.PeerCertificates[0]) {
432 return false, nil
433 }
434 // And do regular pkix checks, ../rfc/7671:799
435 var errs []error
436 for _, host := range append([]dns.Domain{allowedHost}, moreAllowedHosts...) {
437 _, err := pkixVerify(host)
438 log.Debugx("pkix-ee verify", err)
439 if err == nil {
440 return true, nil
441 }
442 errs = append(errs, err)
443 }
444 return false, errors.Join(errs...)
445
446 case adns.TLSAUsageDANETA:
447 // We set roots, so the system defaults don't get used. Verify checks the host name
448 // (set below) and checks for expiration.
449 opts := x509.VerifyOptions{
450 Intermediates: x509.NewCertPool(),
451 Roots: x509.NewCertPool(),
452 }
453
454 // If the full certificate was included, we must add it to the valid roots, the TLS
455 // server may not send it. ../rfc/7671:692
456 var found bool
457 if tlsa.Selector == adns.TLSASelectorCert && tlsa.MatchType == adns.TLSAMatchTypeFull {
458 cert, err := x509.ParseCertificate(tlsa.CertAssoc)
459 if err != nil {
460 log.Debugx("parsing full exact certificate from tlsa record to use as root for usage dane-trusted-anchor", err)
461 // Continue anyway, perhaps the servers sends it again in a way that the tls package can parse? (unlikely)
462 } else {
463 opts.Roots.AddCert(cert)
464 found = true
465 }
466 }
467
468 for i, cert := range cs.PeerCertificates {
469 if match(cert) {
470 opts.Roots.AddCert(cert)
471 found = true
472 break
473 } else if i > 0 {
474 opts.Intermediates.AddCert(cert)
475 }
476 }
477 if !found {
478 // Trusted anchor was not found in TLS certificates so we won't be able to
479 // verify.
480 return false, nil
481 }
482
483 // Trusted anchor was found, still need to verify.
484 var errs []error
485 for _, host := range append([]dns.Domain{allowedHost}, moreAllowedHosts...) {
486 opts.DNSName = host.ASCII
487 _, err := cs.PeerCertificates[0].Verify(opts)
488 if err == nil {
489 return true, nil
490 }
491 errs = append(errs, err)
492 }
493 return false, errors.Join(errs...)
494
495 case adns.TLSAUsageDANEEE:
496 // ../rfc/7250 is about raw public keys instead of x.509 certificates in tls
497 // handshakes. Go's crypto/tls does not implement the extension (see
498 // crypto/tls/common.go, the extensions values don't appear in the
499 // rfc, but have values 19 and 20 according to
500 // https://www.iana.org/assignments/tls-extensiontype-values/tls-extensiontype-values.xhtml#tls-extensiontype-values-1
501 // ../rfc/7671:1148 mentions the raw public keys are allowed. It's still
502 // questionable that this is commonly implemented. For now the world can probably
503 // live with an ignored certificate wrapped around the subject public key info.
504
505 // We don't verify host name in certificate, ../rfc/7671:489
506 // And we don't check for expiration. ../rfc/7671:527
507 // The whole point of this type is to have simple secure infrastructure that
508 // doesn't automatically expire (at the most inconvenient times).
509 return match(cs.PeerCertificates[0]), nil
510
511 default:
512 // Unknown, perhaps defined in the future. Not an error.
513 log.Debug("unrecognized tlsa usage, skipping", slog.Any("tlsausage", tlsa.Usage))
514 return false, nil
515 }
516}
517