1// Package tlsrptsend sends TLS reports based on success/failure statistics and
2// details gathering while making SMTP STARTTLS connections for delivery. See RFC
6// tlsrptsend is a separate package instead of being in tlsrptdb because it imports
7// queue and queue imports tlsrptdb to store tls results, so that would cause a
10// Sending TLS reports and DMARC reports is very similar. See ../dmarcdb/eval.go:/similar and ../tlsrptsend/send.go:/similar.
12// todo spec:
../rfc/8460:441 ../rfc/8460:463 may lead reader to believe they can find a DANE or MTA-STS policy at the same place, while in practice you'll get an MTA-STS policy at a recipient domain and a DANE policy at a mail host, and that's where the TLSRPT policy is defined. it would have helped with this implementation if the distinction was mentioned explicitly, also earlier in the document (i realized it late in the implementation process based on the terminology entry for the policy domain). examples with a tlsrpt record at a mail host would have helped too.
13// todo spec:
../rfc/8460:1017 example report message misses the required DKIM signature.
32 "golang.org/x/exp/slices"
33 "golang.org/x/exp/slog"
35 "github.com/prometheus/client_golang/prometheus"
36 "github.com/prometheus/client_golang/prometheus/promauto"
38 "github.com/mjl-/bstore"
40 "github.com/mjl-/mox/config"
41 "github.com/mjl-/mox/dkim"
42 "github.com/mjl-/mox/dns"
43 "github.com/mjl-/mox/message"
44 "github.com/mjl-/mox/metrics"
45 "github.com/mjl-/mox/mlog"
46 "github.com/mjl-/mox/mox-"
47 "github.com/mjl-/mox/moxio"
48 "github.com/mjl-/mox/moxvar"
49 "github.com/mjl-/mox/queue"
50 "github.com/mjl-/mox/smtp"
51 "github.com/mjl-/mox/store"
52 "github.com/mjl-/mox/tlsrpt"
53 "github.com/mjl-/mox/tlsrptdb"
57 metricReport = promauto.NewCounter(
58 prometheus.CounterOpts{
59 Name: "mox_tlsrptsend_report_queued_total",
60 Help: "Total messages with TLS reports queued.",
63 metricReportError = promauto.NewCounter(
64 prometheus.CounterOpts{
65 Name: "mox_tlsrptsend_report_error_total",
66 Help: "Total errors while composing or queueing TLS reports.",
71var jitterRand = mox.NewPseudoRand()
73// time to sleep until sending reports at midnight t, replaced by tests.
74// Jitter so we don't cause load at exactly midnight, other processes may
75// already be doing that.
76var jitteredTimeUntil = func(t time.Time) time.Duration {
77 return time.Until(t.Add(time.Duration(240+jitterRand.Intn(120)) * time.Second))
80// Start launches a goroutine that wakes up just after 00:00 UTC to send TLSRPT
81// reports. Reports are sent spread out over a 4 hour period.
82func Start(resolver dns.Resolver) {
84 log := mlog.New("tlsrptsend", nil)
87 // In case of panic don't take the whole program down.
90 log.Error("recover from panic", slog.Any("panic", x))
92 metrics.PanicInc(metrics.Tlsrptdb)
96 timer := time.NewTimer(time.Hour) // Reset below.
101 db := tlsrptdb.ResultDB
103 log.Error("no tlsrpt results database for tls reports, not sending reports")
107 // We start sending for previous day, if there are any reports left.
108 endUTC := midnightUTC(time.Now())
111 dayUTC := endUTC.Add(-12 * time.Hour).Format("20060102")
113 // Remove evaluations older than 48 hours (2 reports with 24 hour interval)
114 // They should have been processed by now. We may have kept them
115 // during temporary errors, but persistent temporary errors shouldn't fill up our
116 // database and we don't want to send old reports either.
117 _, err := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db).FilterLess("DayUTC", endUTC.Add((-48-12)*time.Hour).Format("20060102")).Delete()
118 log.Check(err, "removing stale tls results from database")
120 clog := log.WithCid(mox.Cid())
121 clog.Info("sending tls reports", slog.String("day", dayUTC))
122 if err := sendReports(ctx, clog, resolver, db, dayUTC, endUTC); err != nil {
123 clog.Errorx("sending tls reports", err)
124 metricReportError.Inc()
126 clog.Info("finished sending tls reports")
129 endUTC = endUTC.Add(24 * time.Hour)
130 timer.Reset(jitteredTimeUntil(endUTC))
134 log.Info("tls report sender shutting down")
142func midnightUTC(now time.Time) time.Time {
144 return time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location())
147// Sleep in between sending two reports.
149var sleepBetween = func(ctx context.Context, between time.Duration) (ok bool) {
150 t := time.NewTimer(between)
160// sendReports gathers all policy domains that have results that should receive a
161// TLS report and sends a report to each if their TLSRPT DNS record has reporting
163func sendReports(ctx context.Context, log mlog.Log, resolver dns.Resolver, db *bstore.DB, dayUTC string, endTimeUTC time.Time) error {
169 // Gather all policy domains we plan to send to.
170 rcptDoms := map[key]bool{} // Results where recipient domain is equal to policy domain, regardless of IsHost.
171 nonRcptDoms := map[key]bool{} // MX domains (without those that are also recipient domains).
173 q := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db)
174 q.FilterLessEqual("DayUTC", dayUTC)
175 err := q.ForEach(func(e tlsrptdb.TLSResult) error {
177 if e.PolicyDomain != e.RecipientDomain {
180 k := key{e.PolicyDomain, e.DayUTC}
181 if e.SendReport && !doms[k] {
184 doms[k] = doms[k] || e.SendReport
188 return fmt.Errorf("looking for domains to send tls reports to: %v", err)
191 // Stretch sending reports over max 4 hours, but only if there are quite a few
193 between := 4 * time.Hour
195 between = between / time.Duration(nsend)
197 if between > 5*time.Minute {
198 between = 5 * time.Minute
201 var wg sync.WaitGroup
205 remove := map[key]struct{}{}
206 var removeMutex sync.Mutex
208 sendDomains := func(isRcptDom bool, doms map[key]bool) {
209 for k, send := range doms {
212 remove[k] = struct{}{}
218 ok := sleepBetween(ctx, between)
225 // In goroutine, so our timing stays independent of how fast we process.
229 // In case of panic don't take the whole program down.
232 log.Error("unhandled panic in tlsrptsend sendReports", slog.Any("panic", x))
234 metrics.PanicInc(metrics.Tlsrptdb)
239 rlog := log.WithCid(mox.Cid()).With(slog.String("policydomain", k.policyDomain),
240 slog.String("daytutc", k.dayUTC),
241 slog.Bool("isrcptdom", isRcptDom))
242 rlog.Info("looking to send tls report for domain")
243 cleanup, err := sendReportDomain(ctx, rlog, resolver, db, endTimeUTC, isRcptDom, k.policyDomain, k.dayUTC)
245 rlog.Errorx("sending tls report to domain", err)
246 metricReportError.Inc()
250 defer removeMutex.Unlock()
251 remove[k] = struct{}{}
257 // We send to recipient domains first. That will store the reporting addresses for
258 // the recipient domains, which are used when sending to nonRcptDoms to potentially
259 // skip sending a duplicate report.
260 sendDomains(true, rcptDoms)
262 sendDomains(false, nonRcptDoms)
265 // Remove all records that have been processed.
266 err = db.Write(ctx, func(tx *bstore.Tx) error {
267 for k := range remove {
268 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
269 q.FilterNonzero(tlsrptdb.TLSResult{PolicyDomain: k.policyDomain, DayUTC: k.dayUTC})
277 log.Check(err, "cleaning up tls results in database")
282// replaceable for testing.
283var queueAdd = queue.Add
285func sendReportDomain(ctx context.Context, log mlog.Log, resolver dns.Resolver, db *bstore.DB, endUTC time.Time, isRcptDom bool, policyDomain, dayUTC string) (cleanup bool, rerr error) {
286 polDom, err := dns.ParseDomain(policyDomain)
288 return false, fmt.Errorf("parsing policy domain for sending tls reports: %v", err)
291 // Reports need to be DKIM-signed by the submitter domain. Lookup the DKIM
292 // configuration now. If we don't have any, there is no point sending reports.
293 // todo spec:
../rfc/8460:322 "reporting domain" is a bit ambiguous. submitter domain is used in other places. it may be helpful in practice to allow dmarc-relaxed-like matching of the signing domain, so an address postmaster at mail host can send the reports using dkim keys at a higher-up domain (e.g. the publicsuffix domain).
294 fromDom := mox.Conf.Static.HostnameDomain
295 var confDKIM config.DKIM
297 confDom, ok := mox.Conf.Domain(fromDom)
298 if len(confDom.DKIM.Sign) > 0 {
299 confDKIM = confDom.DKIM
302 return true, fmt.Errorf("domain for mail host does not have dkim signing configured, report message cannot be dkim-signed")
305 // Remove least significant label.
307 _, nfd.ASCII, _ = strings.Cut(fromDom.ASCII, ".")
308 _, nfd.Unicode, _ = strings.Cut(fromDom.Unicode, ".")
311 var zerodom dns.Domain
312 if fromDom == zerodom {
313 return true, fmt.Errorf("no configured domain for mail host found, report message cannot be dkim-signed")
317 // We'll cleanup records by default.
319 // But if we encounter a temporary error we cancel cleanup of evaluations on error.
323 if !cleanup || tempError {
325 log.Debug("not cleaning up results after attempting to send tls report")
329 // Get TLSRPT record. If there are no reporting addresses, we're not going to send at all.
330 record, _, err := tlsrpt.Lookup(ctx, log.Logger, resolver, polDom)
332 // If there is no TLSRPT record, that's fine, we'll remove what we tracked.
333 if errors.Is(err, tlsrpt.ErrNoRecord) {
336 cleanup = errors.Is(err, tlsrpt.ErrDNS)
337 return cleanup, fmt.Errorf("looking up current tlsrpt record for reporting addresses: %v", err)
340 var recipients []message.NameAddress
341 var recipientStrs []string
343 for _, l := range record.RUAs {
344 for _, s := range l {
345 u, err := url.Parse(string(s))
347 log.Debugx("parsing rua uri in tlsrpt dns record, ignoring", err, slog.Any("rua", s))
351 if u.Scheme == "mailto" {
352 addr, err := smtp.ParseAddress(u.Opaque)
354 log.Debugx("parsing mailto uri in tlsrpt record rua value, ignoring", err, slog.Any("rua", s))
357 recipients = append(recipients, message.NameAddress{Address: addr})
358 recipientStrs = append(recipientStrs, string(s))
359 } else if u.Scheme == "https" {
360 // Although "report" is ambiguous and could mean both only the JSON data or an
361 // entire message (including DKIM-Signature) with the JSON data, it appears the
362 // intention of the RFC is that the HTTPS transport sends only the JSON data, given
363 // mention of the media type to use (for the HTTP POST). It is the type of the
364 // report, not of a message. TLS reports sent over email must have a DKIM
365 // signature, i.e. must be authenticated, for understandable reasons. No such
366 // requirement is specified for HTTPS, but no one is going to accept
367 // unauthenticated TLS reports over HTTPS. So there seems little point in sending
370 // todo spec: would be good to have clearer distinction between "report" (JSON) and "report message" (message with report attachment, that can be DKIM signed). propose sending report message over https that includes DKIM signature so authenticity can be verified and the report used.
../rfc/8460:310
371 log.Debug("https scheme in rua uri in tlsrpt record, ignoring since they will likey not be used to due lack of authentication", slog.Any("rua", s))
373 log.Debug("unknown scheme in rua uri in tlsrpt record, ignoring", slog.Any("rua", s))
378 if len(recipients) == 0 {
379 // No reports requested, perfectly fine, no work to do for us.
380 log.Debug("no tlsrpt reporting addresses configured")
384 q := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db)
386 q.FilterNonzero(tlsrptdb.TLSResult{RecipientDomain: policyDomain, DayUTC: dayUTC})
388 q.FilterNonzero(tlsrptdb.TLSResult{PolicyDomain: policyDomain, DayUTC: dayUTC})
390 tlsResults, err := q.List()
392 return true, fmt.Errorf("get tls results from database: %v", err)
395 if len(tlsResults) == 0 {
396 // Should not happen. But no point in sending messages with empty reports.
397 return true, fmt.Errorf("no tls results found")
400 // Stop if we already sent a report for this destination.
401 for _, r := range tlsResults {
402 if r.PolicyDomain == r.RecipientDomain && (isRcptDom && r.SentToRecipientDomain || !isRcptDom && r.SentToPolicyDomain) {
407 beginUTC := endUTC.Add(-24 * time.Hour)
409 report := tlsrpt.Report{
410 OrganizationName: fromDom.ASCII,
411 DateRange: tlsrpt.TLSRPTDateRange{
415 ContactInfo: "postmaster@" + fromDom.ASCII,
416 // todo spec:
../rfc/8460:968 ../rfc/8460:1772 ../rfc/8460:691 subject header assumes a report-id in the form of a msg-id, but example and report-id json field explanation allows free-form report-id's (assuming we're talking about the same report-id here).
417 ReportID: endUTC.Add(-12*time.Hour).Format("20060102") + "." + polDom.ASCII + "@" + fromDom.ASCII,
420 rcptDomAddresses := map[string][]string{}
421 for _, tlsResult := range tlsResults {
422 rcptDomAddresses[tlsResult.RecipientDomain] = tlsResult.RecipientDomainReportingAddresses
425 // Merge all results into this report.
426 // If we are sending to a recipient domain, we include all relevant policy domains,
427 // so possibly multiple MX hosts (with DANE policies). That means we may be sending
428 // multiple "no-policy-found" results (1 for sts and 0 or more for mx hosts). An
429 // explicit no-sts or no-tlsa would make these less ambiguous, but the
430 // policy-domain's will make clear which is the MX and which is the recipient
431 // domain. Only for recipient domains with an MX target equal to the recipient host
432 // could it be confusing.
433 // If we are sending to MX targets (that aren't recipient domains), we mention the
434 // affected recipient domains as policy-domain while keeping the original policy
435 // domain (MX target) in the "mx-host" field. This behaviour isn't in the RFC, but
436 // seems useful to give MX operators insight into the recipient domains affected.
437 // We also won't include results for a recipient domain if its TLSRPT policy has
438 // the same reporting addresses as the MX target TLSRPT policy.
439 for i, tlsResult := range tlsResults {
441 if slices.Equal(rcptDomAddresses[tlsResult.RecipientDomain], recipientStrs) {
444 for j, r := range tlsResult.Results {
445 if tlsResult.IsHost {
446 tlsResults[i].Results[j].Policy.MXHost = []string{r.Policy.Domain}
448 tlsResults[i].Results[j].Policy.Domain = tlsResult.RecipientDomain
452 report.Merge(tlsResult.Results...)
455 // We may not have any results left, i.e. when this is an MX target and we already
456 // sent all results in the report to the recipient domain with identical reporting
458 if len(report.Policies) == 0 {
462 if !mox.Conf.Static.OutgoingTLSReportsForAllSuccess {
464 // Check there is at least one failure. If not, we don't send a report.
465 for _, r := range report.Policies {
466 if r.Summary.TotalFailureSessionCount > 0 || len(r.FailureDetails) > 0 {
476 log.Info("sending tls report")
478 reportFile, err := store.CreateMessageTemp(log, "tlsreportout")
480 return false, fmt.Errorf("creating temporary file for outgoing tls report: %v", err)
482 defer store.CloseRemoveTempFile(log, reportFile, "generated tls report")
485 gzw := gzip.NewWriter(reportFile)
486 enc := json.NewEncoder(gzw)
487 enc.SetIndent("", "\t")
489 err = enc.Encode(report.Convert())
495 return false, fmt.Errorf("writing tls report as json with gzip: %v", err)
498 msgf, err := store.CreateMessageTemp(log, "tlsreportmsgout")
500 return false, fmt.Errorf("creating temporary message file with outgoing tls report: %v", err)
502 defer store.CloseRemoveTempFile(log, msgf, "message with generated tls report")
504 // We are sending reports from our host's postmaster address. In a
505 // typical setup the host is a subdomain of a configured domain with
506 // DKIM keys, so we can DKIM-sign our reports. SPF should pass anyway.
507 // todo future: when sending, use an SMTP MAIL FROM that we can relate back to recipient reporting address so we can stop trying to send reports in case of repeated delivery failure DSNs.
508 from := smtp.Address{Localpart: "postmaster", Domain: fromDom}
511 subject := fmt.Sprintf("Report Domain: %s Submitter: %s Report-ID: <%s>", polDom.ASCII, fromDom, report.ReportID)
514 text := fmt.Sprintf(`Attached is a TLS report with a summary of connection successes and failures
515during attempts to securely deliver messages to your mail server, including
516details about errors encountered. You are receiving this message because your
517address is specified in the "rua" field of the TLSRPT record for your
524`, polDom, fromDom, report.ReportID, beginUTC.Format(time.DateTime), endUTC.Format(time.DateTime))
527 reportFilename := fmt.Sprintf("%s!%s!%d!%d.json.gz", fromDom.ASCII, polDom.ASCII, beginUTC.Unix(), endUTC.Add(-time.Second).Unix())
529 // Compose the message.
530 msgPrefix, has8bit, smtputf8, messageID, err := composeMessage(ctx, log, msgf, polDom, confDKIM, from, recipients, subject, text, reportFilename, reportFile)
532 return false, fmt.Errorf("composing message with outgoing tls report: %v", err)
534 msgInfo, err := msgf.Stat()
536 return false, fmt.Errorf("stat message with outgoing tls report: %v", err)
538 msgSize := int64(len(msgPrefix)) + msgInfo.Size()
540 // Already mark the report as sent. If it won't succeed below, it probably won't
541 // succeed on a later retry either. And if we would fail to mark a report as sent
542 // after sending it, we may sent duplicates or even get in some kind of sending
544 err = db.Write(ctx, func(tx *bstore.Tx) error {
546 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
547 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, RecipientDomain: policyDomain})
548 _, err := q.UpdateNonzero(tlsrptdb.TLSResult{SentToRecipientDomain: true})
550 return fmt.Errorf("already marking tls results as sent for recipient domain: %v", err)
553 // Also set reporting addresses for the recipient domain results.
554 q = bstore.QueryTx[tlsrptdb.TLSResult](tx)
555 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, RecipientDomain: policyDomain})
556 _, err = q.UpdateNonzero(tlsrptdb.TLSResult{RecipientDomainReportingAddresses: recipientStrs})
558 return fmt.Errorf("storing recipient domain reporting addresses: %v", err)
561 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
562 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, PolicyDomain: policyDomain})
563 _, err := q.UpdateNonzero(tlsrptdb.TLSResult{SentToPolicyDomain: true})
565 return fmt.Errorf("already marking tls results as sent for policy domain: %v", err)
571 return false, fmt.Errorf("marking tls results as sent: %v", err)
575 for _, rcpt := range recipients {
576 // If recipient is on suppression list, we won't queue the reporting message.
577 q := bstore.QueryDB[tlsrptdb.TLSRPTSuppressAddress](ctx, db)
578 q.FilterNonzero(tlsrptdb.TLSRPTSuppressAddress{ReportingAddress: rcpt.Address.Path().String()})
579 q.FilterGreater("Until", time.Now())
580 exists, err := q.Exists()
582 return false, fmt.Errorf("querying suppress list: %v", err)
585 log.Info("suppressing outgoing tls report", slog.Any("reportingaddress", rcpt.Address))
589 qm := queue.MakeMsg(mox.Conf.Static.Postmaster.Account, from.Path(), rcpt.Address.Path(), has8bit, smtputf8, msgSize, messageID, []byte(msgPrefix), nil)
590 // Don't try as long as regular deliveries, and stop before we would send the
591 // delayed DSN. Though we also won't send that due to IsTLSReport.
594 qm.IsTLSReport = true
599 err = queueAdd(ctx, log, &qm, msgf)
602 log.Errorx("queueing message with tls report", err)
603 metricReportError.Inc()
607 log.Debug("tls report queued", slog.Any("recipient", rcpt))
612 // Regardless of whether we queued a report, we are not going to keep the
613 // evaluations around. Though this can be overridden if tempError is set.
619func composeMessage(ctx context.Context, log mlog.Log, mf *os.File, policyDomain dns.Domain, confDKIM config.DKIM, fromAddr smtp.Address, recipients []message.NameAddress, subject, text, filename string, reportFile *os.File) (msgPrefix string, has8bit, smtputf8 bool, messageID string, rerr error) {
620 xc := message.NewComposer(mf, 100*1024*1024)
626 if err, ok := x.(error); ok && errors.Is(err, message.ErrCompose) {
633 // We only use smtputf8 if we have to, with a utf-8 localpart. For IDNA, we use ASCII domains.
634 for _, a := range recipients {
635 if a.Address.Localpart.IsInternational() {
641 xc.HeaderAddrs("From", []message.NameAddress{{Address: fromAddr}})
642 xc.HeaderAddrs("To", recipients)
645 xc.Header("TLS-Report-Domain", policyDomain.ASCII)
646 xc.Header("TLS-Report-Submitter", fromAddr.Domain.ASCII)
648 xc.Header("TLS-Required", "No")
649 messageID = fmt.Sprintf("<%s>", mox.MessageIDGen(xc.SMTPUTF8))
650 xc.Header("Message-Id", messageID)
651 xc.Header("Date", time.Now().Format(message.RFC5322Z))
652 xc.Header("User-Agent", "mox/"+moxvar.Version)
653 xc.Header("MIME-Version", "1.0")
655 // Multipart message, with a text/plain and the report attached.
656 mp := multipart.NewWriter(xc)
658 xc.Header("Content-Type", fmt.Sprintf(`multipart/report; report-type="tlsrpt"; boundary="%s"`, mp.Boundary()))
661 // Textual part, just mentioning this is a TLS report.
662 textBody, ct, cte := xc.TextPart(text)
663 textHdr := textproto.MIMEHeader{}
664 textHdr.Set("Content-Type", ct)
665 textHdr.Set("Content-Transfer-Encoding", cte)
666 textp, err := mp.CreatePart(textHdr)
667 xc.Checkf(err, "adding text part to message")
668 _, err = textp.Write(textBody)
669 xc.Checkf(err, "writing text part")
671 // TLS report as attachment.
672 ahdr := textproto.MIMEHeader{}
673 ct = mime.FormatMediaType("application/tlsrpt+gzip", map[string]string{"name": filename})
674 ahdr.Set("Content-Type", ct)
675 cd := mime.FormatMediaType("attachment", map[string]string{"filename": filename})
676 ahdr.Set("Content-Disposition", cd)
677 ahdr.Set("Content-Transfer-Encoding", "base64")
678 ap, err := mp.CreatePart(ahdr)
679 xc.Checkf(err, "adding tls report to message")
680 wc := moxio.Base64Writer(ap)
681 _, err = io.Copy(wc, &moxio.AtReader{R: reportFile})
682 xc.Checkf(err, "adding attachment")
684 xc.Checkf(err, "flushing attachment")
687 xc.Checkf(err, "closing multipart")
691 selectors := mox.DKIMSelectors(confDKIM)
692 for i, sel := range selectors {
694 sel.Headers = append(append([]string{}, sel.Headers...), "TLS-Report-Domain", "TLS-Report-Submitter")
698 dkimHeader, err := dkim.Sign(ctx, log.Logger, fromAddr.Localpart, fromAddr.Domain, selectors, smtputf8, mf)
699 xc.Checkf(err, "dkim-signing report message")
701 return dkimHeader, xc.Has8bit, xc.SMTPUTF8, messageID, nil