1// Package tlsrptsend sends TLS reports based on success/failure statistics and
2// details gathering while making SMTP STARTTLS connections for delivery. See RFC
6// tlsrptsend is a separate package instead of being in tlsrptdb because it imports
7// queue and queue imports tlsrptdb to store tls results, so that would cause a
10// Sending TLS reports and DMARC reports is very similar. See ../dmarcdb/eval.go:/similar and ../tlsrptsend/send.go:/similar.
12// todo spec:
../rfc/8460:441 ../rfc/8460:463 may lead reader to believe they can find a DANE or MTA-STS policy at the same place, while in practice you'll get an MTA-STS policy at a recipient domain and a DANE policy at a mail host, and that's where the TLSRPT policy is defined. it would have helped with this implementation if the distinction was mentioned explicitly, also earlier in the document (i realized it late in the implementation process based on the terminology entry for the policy domain). examples with a tlsrpt record at a mail host would have helped too.
13// todo spec:
../rfc/8460:1017 example report message misses the required DKIM signature.
34 "github.com/prometheus/client_golang/prometheus"
35 "github.com/prometheus/client_golang/prometheus/promauto"
37 "github.com/mjl-/bstore"
39 "github.com/mjl-/mox/config"
40 "github.com/mjl-/mox/dkim"
41 "github.com/mjl-/mox/dns"
42 "github.com/mjl-/mox/message"
43 "github.com/mjl-/mox/metrics"
44 "github.com/mjl-/mox/mlog"
45 "github.com/mjl-/mox/mox-"
46 "github.com/mjl-/mox/moxio"
47 "github.com/mjl-/mox/moxvar"
48 "github.com/mjl-/mox/queue"
49 "github.com/mjl-/mox/smtp"
50 "github.com/mjl-/mox/store"
51 "github.com/mjl-/mox/tlsrpt"
52 "github.com/mjl-/mox/tlsrptdb"
56 metricReport = promauto.NewCounter(
57 prometheus.CounterOpts{
58 Name: "mox_tlsrptsend_report_queued_total",
59 Help: "Total messages with TLS reports queued.",
62 metricReportError = promauto.NewCounter(
63 prometheus.CounterOpts{
64 Name: "mox_tlsrptsend_report_error_total",
65 Help: "Total errors while composing or queueing TLS reports.",
70var jitterRand = mox.NewPseudoRand()
72// time to sleep until sending reports at midnight t, replaced by tests.
73// Jitter so we don't cause load at exactly midnight, other processes may
74// already be doing that.
75var jitteredTimeUntil = func(t time.Time) time.Duration {
76 return time.Until(t.Add(time.Duration(240+jitterRand.IntN(120)) * time.Second))
79// Start launches a goroutine that wakes up just after 00:00 UTC to send TLSRPT
80// reports. Reports are sent spread out over a 4 hour period.
81func Start(resolver dns.Resolver) {
83 log := mlog.New("tlsrptsend", nil)
86 // In case of panic don't take the whole program down.
89 log.Error("recover from panic", slog.Any("panic", x))
91 metrics.PanicInc(metrics.Tlsrptdb)
95 timer := time.NewTimer(time.Hour) // Reset below.
100 db := tlsrptdb.ResultDB
102 log.Error("no tlsrpt results database for tls reports, not sending reports")
106 // We start sending for previous day, if there are any reports left.
107 endUTC := midnightUTC(time.Now())
110 dayUTC := endUTC.Add(-12 * time.Hour).Format("20060102")
112 // Remove evaluations older than 48 hours (2 reports with 24 hour interval)
113 // They should have been processed by now. We may have kept them
114 // during temporary errors, but persistent temporary errors shouldn't fill up our
115 // database and we don't want to send old reports either.
116 _, err := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db).FilterLess("DayUTC", endUTC.Add((-48-12)*time.Hour).Format("20060102")).Delete()
117 log.Check(err, "removing stale tls results from database")
119 clog := log.WithCid(mox.Cid())
120 clog.Info("sending tls reports", slog.String("day", dayUTC))
121 if err := sendReports(ctx, clog, resolver, db, dayUTC, endUTC); err != nil {
122 clog.Errorx("sending tls reports", err)
123 metricReportError.Inc()
125 clog.Info("finished sending tls reports")
128 endUTC = endUTC.Add(24 * time.Hour)
129 timer.Reset(jitteredTimeUntil(endUTC))
133 log.Info("tls report sender shutting down")
141func midnightUTC(now time.Time) time.Time {
143 return time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location())
146// Sleep in between sending two reports.
148var sleepBetween = func(ctx context.Context, between time.Duration) (ok bool) {
149 t := time.NewTimer(between)
159// sendReports gathers all policy domains that have results that should receive a
160// TLS report and sends a report to each if their TLSRPT DNS record has reporting
162func sendReports(ctx context.Context, log mlog.Log, resolver dns.Resolver, db *bstore.DB, dayUTC string, endTimeUTC time.Time) error {
168 // Gather all policy domains we plan to send to.
169 rcptDoms := map[key]bool{} // Results where recipient domain is equal to policy domain, regardless of IsHost.
170 nonRcptDoms := map[key]bool{} // MX domains (without those that are also recipient domains).
172 q := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db)
173 q.FilterLessEqual("DayUTC", dayUTC)
174 err := q.ForEach(func(e tlsrptdb.TLSResult) error {
176 if e.PolicyDomain != e.RecipientDomain {
179 k := key{e.PolicyDomain, e.DayUTC}
180 if e.SendReport && !doms[k] {
183 doms[k] = doms[k] || e.SendReport
187 return fmt.Errorf("looking for domains to send tls reports to: %v", err)
190 // Stretch sending reports over max 4 hours, but only if there are quite a few
192 between := 4 * time.Hour
194 between = between / time.Duration(nsend)
196 if between > 5*time.Minute {
197 between = 5 * time.Minute
200 var wg sync.WaitGroup
204 remove := map[key]struct{}{}
205 var removeMutex sync.Mutex
207 sendDomains := func(isRcptDom bool, doms map[key]bool) {
208 for k, send := range doms {
211 remove[k] = struct{}{}
217 ok := sleepBetween(ctx, between)
224 // In goroutine, so our timing stays independent of how fast we process.
228 // In case of panic don't take the whole program down.
231 log.Error("unhandled panic in tlsrptsend sendReports", slog.Any("panic", x))
233 metrics.PanicInc(metrics.Tlsrptdb)
238 rlog := log.WithCid(mox.Cid()).With(slog.String("policydomain", k.policyDomain),
239 slog.String("daytutc", k.dayUTC),
240 slog.Bool("isrcptdom", isRcptDom))
241 rlog.Info("looking to send tls report for domain")
242 cleanup, err := sendReportDomain(ctx, rlog, resolver, db, endTimeUTC, isRcptDom, k.policyDomain, k.dayUTC)
244 rlog.Errorx("sending tls report to domain", err)
245 metricReportError.Inc()
249 defer removeMutex.Unlock()
250 remove[k] = struct{}{}
256 // We send to recipient domains first. That will store the reporting addresses for
257 // the recipient domains, which are used when sending to nonRcptDoms to potentially
258 // skip sending a duplicate report.
259 sendDomains(true, rcptDoms)
261 sendDomains(false, nonRcptDoms)
264 // Remove all records that have been processed.
265 err = db.Write(ctx, func(tx *bstore.Tx) error {
266 for k := range remove {
267 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
268 q.FilterNonzero(tlsrptdb.TLSResult{PolicyDomain: k.policyDomain, DayUTC: k.dayUTC})
276 log.Check(err, "cleaning up tls results in database")
281// replaceable for testing.
282var queueAdd = queue.Add
284func sendReportDomain(ctx context.Context, log mlog.Log, resolver dns.Resolver, db *bstore.DB, endUTC time.Time, isRcptDom bool, policyDomain, dayUTC string) (cleanup bool, rerr error) {
285 polDom, err := dns.ParseDomain(policyDomain)
287 return false, fmt.Errorf("parsing policy domain for sending tls reports: %v", err)
290 // Reports need to be DKIM-signed by the submitter domain. Lookup the DKIM
291 // configuration now. If we don't have any, there is no point sending reports.
292 // todo spec:
../rfc/8460:322 "reporting domain" is a bit ambiguous. submitter domain is used in other places. it may be helpful in practice to allow dmarc-relaxed-like matching of the signing domain, so an address postmaster at mail host can send the reports using dkim keys at a higher-up domain (e.g. the publicsuffix domain).
293 fromDom := mox.Conf.Static.HostnameDomain
294 var confDKIM config.DKIM
296 confDom, ok := mox.Conf.Domain(fromDom)
297 if confDom.Disabled {
298 return true, fmt.Errorf("domain is temporarily disabled")
299 } else if len(confDom.DKIM.Sign) > 0 {
300 confDKIM = confDom.DKIM
303 return true, fmt.Errorf("domain for mail host does not have dkim signing configured, report message cannot be dkim-signed")
306 // Remove least significant label.
308 _, nfd.ASCII, _ = strings.Cut(fromDom.ASCII, ".")
309 _, nfd.Unicode, _ = strings.Cut(fromDom.Unicode, ".")
312 var zerodom dns.Domain
313 if fromDom == zerodom {
314 return true, fmt.Errorf("no configured domain for mail host found, report message cannot be dkim-signed")
318 // We'll cleanup records by default.
320 // But if we encounter a temporary error we cancel cleanup of evaluations on error.
324 if !cleanup || tempError {
326 log.Debug("not cleaning up results after attempting to send tls report")
330 // Get TLSRPT record. If there are no reporting addresses, we're not going to send at all.
331 record, _, err := tlsrpt.Lookup(ctx, log.Logger, resolver, polDom)
333 // If there is no TLSRPT record, that's fine, we'll remove what we tracked.
334 if errors.Is(err, tlsrpt.ErrNoRecord) {
337 cleanup = errors.Is(err, tlsrpt.ErrDNS)
338 return cleanup, fmt.Errorf("looking up current tlsrpt record for reporting addresses: %v", err)
341 var recipients []message.NameAddress
342 var recipientStrs []string
344 for _, l := range record.RUAs {
345 for _, s := range l {
346 u, err := url.Parse(string(s))
348 log.Debugx("parsing rua uri in tlsrpt dns record, ignoring", err, slog.Any("rua", s))
352 if u.Scheme == "mailto" {
353 addr, err := smtp.ParseAddress(u.Opaque)
355 log.Debugx("parsing mailto uri in tlsrpt record rua value, ignoring", err, slog.Any("rua", s))
358 recipients = append(recipients, message.NameAddress{Address: addr})
359 recipientStrs = append(recipientStrs, string(s))
360 } else if u.Scheme == "https" {
361 // Although "report" is ambiguous and could mean both only the JSON data or an
362 // entire message (including DKIM-Signature) with the JSON data, it appears the
363 // intention of the RFC is that the HTTPS transport sends only the JSON data, given
364 // mention of the media type to use (for the HTTP POST). It is the type of the
365 // report, not of a message. TLS reports sent over email must have a DKIM
366 // signature, i.e. must be authenticated, for understandable reasons. No such
367 // requirement is specified for HTTPS, but no one is going to accept
368 // unauthenticated TLS reports over HTTPS. So there seems little point in sending
371 // todo spec: would be good to have clearer distinction between "report" (JSON) and "report message" (message with report attachment, that can be DKIM signed). propose sending report message over https that includes DKIM signature so authenticity can be verified and the report used.
../rfc/8460:310
372 log.Debug("https scheme in rua uri in tlsrpt record, ignoring since they will likey not be used to due lack of authentication", slog.Any("rua", s))
374 log.Debug("unknown scheme in rua uri in tlsrpt record, ignoring", slog.Any("rua", s))
379 if len(recipients) == 0 {
380 // No reports requested, perfectly fine, no work to do for us.
381 log.Debug("no tlsrpt reporting addresses configured")
385 q := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db)
387 q.FilterNonzero(tlsrptdb.TLSResult{RecipientDomain: policyDomain, DayUTC: dayUTC})
389 q.FilterNonzero(tlsrptdb.TLSResult{PolicyDomain: policyDomain, DayUTC: dayUTC})
391 tlsResults, err := q.List()
393 return true, fmt.Errorf("get tls results from database: %v", err)
396 if len(tlsResults) == 0 {
397 // Should not happen. But no point in sending messages with empty reports.
398 return true, fmt.Errorf("no tls results found")
401 // Stop if we already sent a report for this destination.
402 for _, r := range tlsResults {
403 if r.PolicyDomain == r.RecipientDomain && (isRcptDom && r.SentToRecipientDomain || !isRcptDom && r.SentToPolicyDomain) {
408 beginUTC := endUTC.Add(-24 * time.Hour)
410 report := tlsrpt.Report{
411 OrganizationName: fromDom.ASCII,
412 DateRange: tlsrpt.TLSRPTDateRange{
416 ContactInfo: "postmaster@" + fromDom.ASCII,
417 // todo spec:
../rfc/8460:968 ../rfc/8460:1772 ../rfc/8460:691 subject header assumes a report-id in the form of a msg-id, but example and report-id json field explanation allows free-form report-id's (assuming we're talking about the same report-id here).
418 ReportID: endUTC.Add(-12*time.Hour).Format("20060102") + "." + polDom.ASCII + "@" + fromDom.ASCII,
421 rcptDomAddresses := map[string][]string{}
422 for _, tlsResult := range tlsResults {
423 rcptDomAddresses[tlsResult.RecipientDomain] = tlsResult.RecipientDomainReportingAddresses
426 // Merge all results into this report.
427 // If we are sending to a recipient domain, we include all relevant policy domains,
428 // so possibly multiple MX hosts (with DANE policies). That means we may be sending
429 // multiple "no-policy-found" results (1 for sts and 0 or more for mx hosts). An
430 // explicit no-sts or no-tlsa would make these less ambiguous, but the
431 // policy-domain's will make clear which is the MX and which is the recipient
432 // domain. Only for recipient domains with an MX target equal to the recipient host
433 // could it be confusing.
434 // If we are sending to MX targets (that aren't recipient domains), we mention the
435 // affected recipient domains as policy-domain while keeping the original policy
436 // domain (MX target) in the "mx-host" field. This behaviour isn't in the RFC, but
437 // seems useful to give MX operators insight into the recipient domains affected.
438 // We also won't include results for a recipient domain if its TLSRPT policy has
439 // the same reporting addresses as the MX target TLSRPT policy.
440 for i, tlsResult := range tlsResults {
442 if slices.Equal(rcptDomAddresses[tlsResult.RecipientDomain], recipientStrs) {
445 rcptDom, err := dns.ParseDomain(tlsResult.RecipientDomain)
447 return true, fmt.Errorf("parsing recipient domain %q from result: %v", tlsResult.RecipientDomain, err)
449 for j, r := range tlsResult.Results {
450 if tlsResult.IsHost {
451 tlsResults[i].Results[j].Policy.MXHost = []string{r.Policy.Domain}
453 tlsResults[i].Results[j].Policy.Domain = rcptDom.ASCII
457 report.Merge(tlsResult.Results...)
460 // We may not have any results left, i.e. when this is an MX target and we already
461 // sent all results in the report to the recipient domain with identical reporting
463 if len(report.Policies) == 0 {
467 if !mox.Conf.Static.OutgoingTLSReportsForAllSuccess {
469 // Check there is at least one failure. If not, we don't send a report.
470 for _, r := range report.Policies {
471 if r.Summary.TotalFailureSessionCount > 0 || len(r.FailureDetails) > 0 {
481 log.Info("sending tls report")
483 reportFile, err := store.CreateMessageTemp(log, "tlsreportout")
485 return false, fmt.Errorf("creating temporary file for outgoing tls report: %v", err)
487 defer store.CloseRemoveTempFile(log, reportFile, "generated tls report")
490 gzw := gzip.NewWriter(reportFile)
491 enc := json.NewEncoder(gzw)
492 enc.SetIndent("", "\t")
494 err = enc.Encode(report.Convert())
500 return false, fmt.Errorf("writing tls report as json with gzip: %v", err)
503 msgf, err := store.CreateMessageTemp(log, "tlsreportmsgout")
505 return false, fmt.Errorf("creating temporary message file with outgoing tls report: %v", err)
507 defer store.CloseRemoveTempFile(log, msgf, "message with generated tls report")
509 // We are sending reports from our host's postmaster address. In a
510 // typical setup the host is a subdomain of a configured domain with
511 // DKIM keys, so we can DKIM-sign our reports. SPF should pass anyway.
512 // todo future: when sending, use an SMTP MAIL FROM that we can relate back to recipient reporting address so we can stop trying to send reports in case of repeated delivery failure DSNs.
513 from := smtp.NewAddress("postmaster", fromDom)
516 subject := fmt.Sprintf("Report Domain: %s Submitter: %s Report-ID: <%s>", polDom.ASCII, fromDom, report.ReportID)
519 text := fmt.Sprintf(`Attached is a TLS report with a summary of connection successes and failures
520during attempts to securely deliver messages to your mail server, including
521details about errors encountered. You are receiving this message because your
522address is specified in the "rua" field of the TLSRPT record for your
529`, polDom, fromDom, report.ReportID, beginUTC.Format(time.DateTime), endUTC.Format(time.DateTime))
532 reportFilename := fmt.Sprintf("%s!%s!%d!%d.json.gz", fromDom.ASCII, polDom.ASCII, beginUTC.Unix(), endUTC.Add(-time.Second).Unix())
534 // Compose the message.
535 msgPrefix, has8bit, smtputf8, messageID, err := composeMessage(ctx, log, msgf, polDom, confDKIM, from, recipients, subject, text, reportFilename, reportFile)
537 return false, fmt.Errorf("composing message with outgoing tls report: %v", err)
539 msgInfo, err := msgf.Stat()
541 return false, fmt.Errorf("stat message with outgoing tls report: %v", err)
543 msgSize := int64(len(msgPrefix)) + msgInfo.Size()
545 // Already mark the report as sent. If it won't succeed below, it probably won't
546 // succeed on a later retry either. And if we would fail to mark a report as sent
547 // after sending it, we may sent duplicates or even get in some kind of sending
549 err = db.Write(ctx, func(tx *bstore.Tx) error {
551 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
552 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, RecipientDomain: policyDomain})
553 _, err := q.UpdateNonzero(tlsrptdb.TLSResult{SentToRecipientDomain: true})
555 return fmt.Errorf("already marking tls results as sent for recipient domain: %v", err)
558 // Also set reporting addresses for the recipient domain results.
559 q = bstore.QueryTx[tlsrptdb.TLSResult](tx)
560 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, RecipientDomain: policyDomain})
561 _, err = q.UpdateNonzero(tlsrptdb.TLSResult{RecipientDomainReportingAddresses: recipientStrs})
563 return fmt.Errorf("storing recipient domain reporting addresses: %v", err)
566 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
567 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, PolicyDomain: policyDomain})
568 _, err := q.UpdateNonzero(tlsrptdb.TLSResult{SentToPolicyDomain: true})
570 return fmt.Errorf("already marking tls results as sent for policy domain: %v", err)
576 return false, fmt.Errorf("marking tls results as sent: %v", err)
580 for _, rcpt := range recipients {
581 // If recipient is on suppression list, we won't queue the reporting message.
582 q := bstore.QueryDB[tlsrptdb.SuppressAddress](ctx, db)
583 q.FilterNonzero(tlsrptdb.SuppressAddress{ReportingAddress: rcpt.Address.Path().String()})
584 q.FilterGreater("Until", time.Now())
585 exists, err := q.Exists()
587 return false, fmt.Errorf("querying suppress list: %v", err)
590 log.Info("suppressing outgoing tls report", slog.Any("reportingaddress", rcpt.Address))
594 qm := queue.MakeMsg(from.Path(), rcpt.Address.Path(), has8bit, smtputf8, msgSize, messageID, []byte(msgPrefix), nil, time.Now(), subject)
595 // Don't try as long as regular deliveries, and stop before we would send the
596 // delayed DSN. Though we also won't send that due to IsTLSReport.
599 qm.IsTLSReport = true
604 err = queueAdd(ctx, log, mox.Conf.Static.Postmaster.Account, msgf, qm)
607 log.Errorx("queueing message with tls report", err)
608 metricReportError.Inc()
612 log.Debug("tls report queued", slog.Any("recipient", rcpt))
617 // Regardless of whether we queued a report, we are not going to keep the
618 // evaluations around. Though this can be overridden if tempError is set.
624func composeMessage(ctx context.Context, log mlog.Log, mf *os.File, policyDomain dns.Domain, confDKIM config.DKIM, fromAddr smtp.Address, recipients []message.NameAddress, subject, text, filename string, reportFile *os.File) (msgPrefix string, has8bit, smtputf8 bool, messageID string, rerr error) {
625 // We only use smtputf8 if we have to, with a utf-8 localpart. For IDNA, we use ASCII domains.
626 smtputf8 = fromAddr.Localpart.IsInternational()
627 for _, r := range recipients {
629 smtputf8 = r.Address.Localpart.IsInternational()
633 xc := message.NewComposer(mf, 100*1024*1024, smtputf8)
639 if err, ok := x.(error); ok && errors.Is(err, message.ErrCompose) {
646 xc.HeaderAddrs("From", []message.NameAddress{{Address: fromAddr}})
647 xc.HeaderAddrs("To", recipients)
650 xc.Header("TLS-Report-Domain", policyDomain.ASCII)
651 xc.Header("TLS-Report-Submitter", fromAddr.Domain.ASCII)
653 xc.Header("TLS-Required", "No")
654 messageID = fmt.Sprintf("<%s>", mox.MessageIDGen(xc.SMTPUTF8))
655 xc.Header("Message-Id", messageID)
656 xc.Header("Date", time.Now().Format(message.RFC5322Z))
657 xc.Header("User-Agent", "mox/"+moxvar.Version)
658 xc.Header("MIME-Version", "1.0")
660 // Multipart message, with a text/plain and the report attached.
661 mp := multipart.NewWriter(xc)
663 xc.Header("Content-Type", fmt.Sprintf(`multipart/report; report-type="tlsrpt"; boundary="%s"`, mp.Boundary()))
666 // Textual part, just mentioning this is a TLS report.
667 textBody, ct, cte := xc.TextPart("plain", text)
668 textHdr := textproto.MIMEHeader{}
669 textHdr.Set("Content-Type", ct)
670 textHdr.Set("Content-Transfer-Encoding", cte)
671 textp, err := mp.CreatePart(textHdr)
672 xc.Checkf(err, "adding text part to message")
673 _, err = textp.Write(textBody)
674 xc.Checkf(err, "writing text part")
676 // TLS report as attachment.
677 ahdr := textproto.MIMEHeader{}
678 ct = mime.FormatMediaType("application/tlsrpt+gzip", map[string]string{"name": filename})
679 ahdr.Set("Content-Type", ct)
680 cd := mime.FormatMediaType("attachment", map[string]string{"filename": filename})
681 ahdr.Set("Content-Disposition", cd)
682 ahdr.Set("Content-Transfer-Encoding", "base64")
683 ap, err := mp.CreatePart(ahdr)
684 xc.Checkf(err, "adding tls report to message")
685 wc := moxio.Base64Writer(ap)
686 _, err = io.Copy(wc, &moxio.AtReader{R: reportFile})
687 xc.Checkf(err, "adding attachment")
689 xc.Checkf(err, "flushing attachment")
692 xc.Checkf(err, "closing multipart")
696 selectors := mox.DKIMSelectors(confDKIM)
697 for i, sel := range selectors {
699 sel.Headers = append(append([]string{}, sel.Headers...), "TLS-Report-Domain", "TLS-Report-Submitter")
703 dkimHeader, err := dkim.Sign(ctx, log.Logger, fromAddr.Localpart, fromAddr.Domain, selectors, smtputf8, mf)
704 xc.Checkf(err, "dkim-signing report message")
706 return dkimHeader, xc.Has8bit, xc.SMTPUTF8, messageID, nil