1// Package tlsrptsend sends TLS reports based on success/failure statistics and
2// details gathering while making SMTP STARTTLS connections for delivery. See RFC
6// tlsrptsend is a separate package instead of being in tlsrptdb because it imports
7// queue and queue imports tlsrptdb to store tls results, so that would cause a
10// Sending TLS reports and DMARC reports is very similar. See ../dmarcdb/eval.go:/similar and ../tlsrptsend/send.go:/similar.
12// todo spec:
../rfc/8460:441 ../rfc/8460:463 may lead reader to believe they can find a DANE or MTA-STS policy at the same place, while in practice you'll get an MTA-STS policy at a recipient domain and a DANE policy at a mail host, and that's where the TLSRPT policy is defined. it would have helped with this implementation if the distinction was mentioned explicitly, also earlier in the document (i realized it late in the implementation process based on the terminology entry for the policy domain). examples with a tlsrpt record at a mail host would have helped too.
13// todo spec:
../rfc/8460:1017 example report message misses the required DKIM signature.
34 "github.com/prometheus/client_golang/prometheus"
35 "github.com/prometheus/client_golang/prometheus/promauto"
37 "github.com/mjl-/bstore"
39 "github.com/mjl-/mox/config"
40 "github.com/mjl-/mox/dkim"
41 "github.com/mjl-/mox/dns"
42 "github.com/mjl-/mox/message"
43 "github.com/mjl-/mox/metrics"
44 "github.com/mjl-/mox/mlog"
45 "github.com/mjl-/mox/mox-"
46 "github.com/mjl-/mox/moxio"
47 "github.com/mjl-/mox/moxvar"
48 "github.com/mjl-/mox/queue"
49 "github.com/mjl-/mox/smtp"
50 "github.com/mjl-/mox/store"
51 "github.com/mjl-/mox/tlsrpt"
52 "github.com/mjl-/mox/tlsrptdb"
56 metricReport = promauto.NewCounter(
57 prometheus.CounterOpts{
58 Name: "mox_tlsrptsend_report_queued_total",
59 Help: "Total messages with TLS reports queued.",
62 metricReportError = promauto.NewCounter(
63 prometheus.CounterOpts{
64 Name: "mox_tlsrptsend_report_error_total",
65 Help: "Total errors while composing or queueing TLS reports.",
70var jitterRand = mox.NewPseudoRand()
72// time to sleep until sending reports at midnight t, replaced by tests.
73// Jitter so we don't cause load at exactly midnight, other processes may
74// already be doing that.
75var jitteredTimeUntil = func(t time.Time) time.Duration {
76 return time.Until(t.Add(time.Duration(240+jitterRand.Intn(120)) * time.Second))
79// Start launches a goroutine that wakes up just after 00:00 UTC to send TLSRPT
80// reports. Reports are sent spread out over a 4 hour period.
81func Start(resolver dns.Resolver) {
83 log := mlog.New("tlsrptsend", nil)
86 // In case of panic don't take the whole program down.
89 log.Error("recover from panic", slog.Any("panic", x))
91 metrics.PanicInc(metrics.Tlsrptdb)
95 timer := time.NewTimer(time.Hour) // Reset below.
100 db := tlsrptdb.ResultDB
102 log.Error("no tlsrpt results database for tls reports, not sending reports")
106 // We start sending for previous day, if there are any reports left.
107 endUTC := midnightUTC(time.Now())
110 dayUTC := endUTC.Add(-12 * time.Hour).Format("20060102")
112 // Remove evaluations older than 48 hours (2 reports with 24 hour interval)
113 // They should have been processed by now. We may have kept them
114 // during temporary errors, but persistent temporary errors shouldn't fill up our
115 // database and we don't want to send old reports either.
116 _, err := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db).FilterLess("DayUTC", endUTC.Add((-48-12)*time.Hour).Format("20060102")).Delete()
117 log.Check(err, "removing stale tls results from database")
119 clog := log.WithCid(mox.Cid())
120 clog.Info("sending tls reports", slog.String("day", dayUTC))
121 if err := sendReports(ctx, clog, resolver, db, dayUTC, endUTC); err != nil {
122 clog.Errorx("sending tls reports", err)
123 metricReportError.Inc()
125 clog.Info("finished sending tls reports")
128 endUTC = endUTC.Add(24 * time.Hour)
129 timer.Reset(jitteredTimeUntil(endUTC))
133 log.Info("tls report sender shutting down")
141func midnightUTC(now time.Time) time.Time {
143 return time.Date(t.Year(), t.Month(), t.Day(), 0, 0, 0, 0, t.Location())
146// Sleep in between sending two reports.
148var sleepBetween = func(ctx context.Context, between time.Duration) (ok bool) {
149 t := time.NewTimer(between)
159// sendReports gathers all policy domains that have results that should receive a
160// TLS report and sends a report to each if their TLSRPT DNS record has reporting
162func sendReports(ctx context.Context, log mlog.Log, resolver dns.Resolver, db *bstore.DB, dayUTC string, endTimeUTC time.Time) error {
168 // Gather all policy domains we plan to send to.
169 rcptDoms := map[key]bool{} // Results where recipient domain is equal to policy domain, regardless of IsHost.
170 nonRcptDoms := map[key]bool{} // MX domains (without those that are also recipient domains).
172 q := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db)
173 q.FilterLessEqual("DayUTC", dayUTC)
174 err := q.ForEach(func(e tlsrptdb.TLSResult) error {
176 if e.PolicyDomain != e.RecipientDomain {
179 k := key{e.PolicyDomain, e.DayUTC}
180 if e.SendReport && !doms[k] {
183 doms[k] = doms[k] || e.SendReport
187 return fmt.Errorf("looking for domains to send tls reports to: %v", err)
190 // Stretch sending reports over max 4 hours, but only if there are quite a few
192 between := 4 * time.Hour
194 between = between / time.Duration(nsend)
196 if between > 5*time.Minute {
197 between = 5 * time.Minute
200 var wg sync.WaitGroup
204 remove := map[key]struct{}{}
205 var removeMutex sync.Mutex
207 sendDomains := func(isRcptDom bool, doms map[key]bool) {
208 for k, send := range doms {
211 remove[k] = struct{}{}
217 ok := sleepBetween(ctx, between)
224 // In goroutine, so our timing stays independent of how fast we process.
228 // In case of panic don't take the whole program down.
231 log.Error("unhandled panic in tlsrptsend sendReports", slog.Any("panic", x))
233 metrics.PanicInc(metrics.Tlsrptdb)
238 rlog := log.WithCid(mox.Cid()).With(slog.String("policydomain", k.policyDomain),
239 slog.String("daytutc", k.dayUTC),
240 slog.Bool("isrcptdom", isRcptDom))
241 rlog.Info("looking to send tls report for domain")
242 cleanup, err := sendReportDomain(ctx, rlog, resolver, db, endTimeUTC, isRcptDom, k.policyDomain, k.dayUTC)
244 rlog.Errorx("sending tls report to domain", err)
245 metricReportError.Inc()
249 defer removeMutex.Unlock()
250 remove[k] = struct{}{}
256 // We send to recipient domains first. That will store the reporting addresses for
257 // the recipient domains, which are used when sending to nonRcptDoms to potentially
258 // skip sending a duplicate report.
259 sendDomains(true, rcptDoms)
261 sendDomains(false, nonRcptDoms)
264 // Remove all records that have been processed.
265 err = db.Write(ctx, func(tx *bstore.Tx) error {
266 for k := range remove {
267 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
268 q.FilterNonzero(tlsrptdb.TLSResult{PolicyDomain: k.policyDomain, DayUTC: k.dayUTC})
276 log.Check(err, "cleaning up tls results in database")
281// replaceable for testing.
282var queueAdd = queue.Add
284func sendReportDomain(ctx context.Context, log mlog.Log, resolver dns.Resolver, db *bstore.DB, endUTC time.Time, isRcptDom bool, policyDomain, dayUTC string) (cleanup bool, rerr error) {
285 polDom, err := dns.ParseDomain(policyDomain)
287 return false, fmt.Errorf("parsing policy domain for sending tls reports: %v", err)
290 // Reports need to be DKIM-signed by the submitter domain. Lookup the DKIM
291 // configuration now. If we don't have any, there is no point sending reports.
292 // todo spec:
../rfc/8460:322 "reporting domain" is a bit ambiguous. submitter domain is used in other places. it may be helpful in practice to allow dmarc-relaxed-like matching of the signing domain, so an address postmaster at mail host can send the reports using dkim keys at a higher-up domain (e.g. the publicsuffix domain).
293 fromDom := mox.Conf.Static.HostnameDomain
294 var confDKIM config.DKIM
296 confDom, ok := mox.Conf.Domain(fromDom)
297 if len(confDom.DKIM.Sign) > 0 {
298 confDKIM = confDom.DKIM
301 return true, fmt.Errorf("domain for mail host does not have dkim signing configured, report message cannot be dkim-signed")
304 // Remove least significant label.
306 _, nfd.ASCII, _ = strings.Cut(fromDom.ASCII, ".")
307 _, nfd.Unicode, _ = strings.Cut(fromDom.Unicode, ".")
310 var zerodom dns.Domain
311 if fromDom == zerodom {
312 return true, fmt.Errorf("no configured domain for mail host found, report message cannot be dkim-signed")
316 // We'll cleanup records by default.
318 // But if we encounter a temporary error we cancel cleanup of evaluations on error.
322 if !cleanup || tempError {
324 log.Debug("not cleaning up results after attempting to send tls report")
328 // Get TLSRPT record. If there are no reporting addresses, we're not going to send at all.
329 record, _, err := tlsrpt.Lookup(ctx, log.Logger, resolver, polDom)
331 // If there is no TLSRPT record, that's fine, we'll remove what we tracked.
332 if errors.Is(err, tlsrpt.ErrNoRecord) {
335 cleanup = errors.Is(err, tlsrpt.ErrDNS)
336 return cleanup, fmt.Errorf("looking up current tlsrpt record for reporting addresses: %v", err)
339 var recipients []message.NameAddress
340 var recipientStrs []string
342 for _, l := range record.RUAs {
343 for _, s := range l {
344 u, err := url.Parse(string(s))
346 log.Debugx("parsing rua uri in tlsrpt dns record, ignoring", err, slog.Any("rua", s))
350 if u.Scheme == "mailto" {
351 addr, err := smtp.ParseAddress(u.Opaque)
353 log.Debugx("parsing mailto uri in tlsrpt record rua value, ignoring", err, slog.Any("rua", s))
356 recipients = append(recipients, message.NameAddress{Address: addr})
357 recipientStrs = append(recipientStrs, string(s))
358 } else if u.Scheme == "https" {
359 // Although "report" is ambiguous and could mean both only the JSON data or an
360 // entire message (including DKIM-Signature) with the JSON data, it appears the
361 // intention of the RFC is that the HTTPS transport sends only the JSON data, given
362 // mention of the media type to use (for the HTTP POST). It is the type of the
363 // report, not of a message. TLS reports sent over email must have a DKIM
364 // signature, i.e. must be authenticated, for understandable reasons. No such
365 // requirement is specified for HTTPS, but no one is going to accept
366 // unauthenticated TLS reports over HTTPS. So there seems little point in sending
369 // todo spec: would be good to have clearer distinction between "report" (JSON) and "report message" (message with report attachment, that can be DKIM signed). propose sending report message over https that includes DKIM signature so authenticity can be verified and the report used.
../rfc/8460:310
370 log.Debug("https scheme in rua uri in tlsrpt record, ignoring since they will likey not be used to due lack of authentication", slog.Any("rua", s))
372 log.Debug("unknown scheme in rua uri in tlsrpt record, ignoring", slog.Any("rua", s))
377 if len(recipients) == 0 {
378 // No reports requested, perfectly fine, no work to do for us.
379 log.Debug("no tlsrpt reporting addresses configured")
383 q := bstore.QueryDB[tlsrptdb.TLSResult](ctx, db)
385 q.FilterNonzero(tlsrptdb.TLSResult{RecipientDomain: policyDomain, DayUTC: dayUTC})
387 q.FilterNonzero(tlsrptdb.TLSResult{PolicyDomain: policyDomain, DayUTC: dayUTC})
389 tlsResults, err := q.List()
391 return true, fmt.Errorf("get tls results from database: %v", err)
394 if len(tlsResults) == 0 {
395 // Should not happen. But no point in sending messages with empty reports.
396 return true, fmt.Errorf("no tls results found")
399 // Stop if we already sent a report for this destination.
400 for _, r := range tlsResults {
401 if r.PolicyDomain == r.RecipientDomain && (isRcptDom && r.SentToRecipientDomain || !isRcptDom && r.SentToPolicyDomain) {
406 beginUTC := endUTC.Add(-24 * time.Hour)
408 report := tlsrpt.Report{
409 OrganizationName: fromDom.ASCII,
410 DateRange: tlsrpt.TLSRPTDateRange{
414 ContactInfo: "postmaster@" + fromDom.ASCII,
415 // todo spec:
../rfc/8460:968 ../rfc/8460:1772 ../rfc/8460:691 subject header assumes a report-id in the form of a msg-id, but example and report-id json field explanation allows free-form report-id's (assuming we're talking about the same report-id here).
416 ReportID: endUTC.Add(-12*time.Hour).Format("20060102") + "." + polDom.ASCII + "@" + fromDom.ASCII,
419 rcptDomAddresses := map[string][]string{}
420 for _, tlsResult := range tlsResults {
421 rcptDomAddresses[tlsResult.RecipientDomain] = tlsResult.RecipientDomainReportingAddresses
424 // Merge all results into this report.
425 // If we are sending to a recipient domain, we include all relevant policy domains,
426 // so possibly multiple MX hosts (with DANE policies). That means we may be sending
427 // multiple "no-policy-found" results (1 for sts and 0 or more for mx hosts). An
428 // explicit no-sts or no-tlsa would make these less ambiguous, but the
429 // policy-domain's will make clear which is the MX and which is the recipient
430 // domain. Only for recipient domains with an MX target equal to the recipient host
431 // could it be confusing.
432 // If we are sending to MX targets (that aren't recipient domains), we mention the
433 // affected recipient domains as policy-domain while keeping the original policy
434 // domain (MX target) in the "mx-host" field. This behaviour isn't in the RFC, but
435 // seems useful to give MX operators insight into the recipient domains affected.
436 // We also won't include results for a recipient domain if its TLSRPT policy has
437 // the same reporting addresses as the MX target TLSRPT policy.
438 for i, tlsResult := range tlsResults {
440 if slices.Equal(rcptDomAddresses[tlsResult.RecipientDomain], recipientStrs) {
443 rcptDom, err := dns.ParseDomain(tlsResult.RecipientDomain)
445 return true, fmt.Errorf("parsing recipient domain %q from result: %v", tlsResult.RecipientDomain, err)
447 for j, r := range tlsResult.Results {
448 if tlsResult.IsHost {
449 tlsResults[i].Results[j].Policy.MXHost = []string{r.Policy.Domain}
451 tlsResults[i].Results[j].Policy.Domain = rcptDom.ASCII
455 report.Merge(tlsResult.Results...)
458 // We may not have any results left, i.e. when this is an MX target and we already
459 // sent all results in the report to the recipient domain with identical reporting
461 if len(report.Policies) == 0 {
465 if !mox.Conf.Static.OutgoingTLSReportsForAllSuccess {
467 // Check there is at least one failure. If not, we don't send a report.
468 for _, r := range report.Policies {
469 if r.Summary.TotalFailureSessionCount > 0 || len(r.FailureDetails) > 0 {
479 log.Info("sending tls report")
481 reportFile, err := store.CreateMessageTemp(log, "tlsreportout")
483 return false, fmt.Errorf("creating temporary file for outgoing tls report: %v", err)
485 defer store.CloseRemoveTempFile(log, reportFile, "generated tls report")
488 gzw := gzip.NewWriter(reportFile)
489 enc := json.NewEncoder(gzw)
490 enc.SetIndent("", "\t")
492 err = enc.Encode(report.Convert())
498 return false, fmt.Errorf("writing tls report as json with gzip: %v", err)
501 msgf, err := store.CreateMessageTemp(log, "tlsreportmsgout")
503 return false, fmt.Errorf("creating temporary message file with outgoing tls report: %v", err)
505 defer store.CloseRemoveTempFile(log, msgf, "message with generated tls report")
507 // We are sending reports from our host's postmaster address. In a
508 // typical setup the host is a subdomain of a configured domain with
509 // DKIM keys, so we can DKIM-sign our reports. SPF should pass anyway.
510 // todo future: when sending, use an SMTP MAIL FROM that we can relate back to recipient reporting address so we can stop trying to send reports in case of repeated delivery failure DSNs.
511 from := smtp.Address{Localpart: "postmaster", Domain: fromDom}
514 subject := fmt.Sprintf("Report Domain: %s Submitter: %s Report-ID: <%s>", polDom.ASCII, fromDom, report.ReportID)
517 text := fmt.Sprintf(`Attached is a TLS report with a summary of connection successes and failures
518during attempts to securely deliver messages to your mail server, including
519details about errors encountered. You are receiving this message because your
520address is specified in the "rua" field of the TLSRPT record for your
527`, polDom, fromDom, report.ReportID, beginUTC.Format(time.DateTime), endUTC.Format(time.DateTime))
530 reportFilename := fmt.Sprintf("%s!%s!%d!%d.json.gz", fromDom.ASCII, polDom.ASCII, beginUTC.Unix(), endUTC.Add(-time.Second).Unix())
532 // Compose the message.
533 msgPrefix, has8bit, smtputf8, messageID, err := composeMessage(ctx, log, msgf, polDom, confDKIM, from, recipients, subject, text, reportFilename, reportFile)
535 return false, fmt.Errorf("composing message with outgoing tls report: %v", err)
537 msgInfo, err := msgf.Stat()
539 return false, fmt.Errorf("stat message with outgoing tls report: %v", err)
541 msgSize := int64(len(msgPrefix)) + msgInfo.Size()
543 // Already mark the report as sent. If it won't succeed below, it probably won't
544 // succeed on a later retry either. And if we would fail to mark a report as sent
545 // after sending it, we may sent duplicates or even get in some kind of sending
547 err = db.Write(ctx, func(tx *bstore.Tx) error {
549 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
550 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, RecipientDomain: policyDomain})
551 _, err := q.UpdateNonzero(tlsrptdb.TLSResult{SentToRecipientDomain: true})
553 return fmt.Errorf("already marking tls results as sent for recipient domain: %v", err)
556 // Also set reporting addresses for the recipient domain results.
557 q = bstore.QueryTx[tlsrptdb.TLSResult](tx)
558 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, RecipientDomain: policyDomain})
559 _, err = q.UpdateNonzero(tlsrptdb.TLSResult{RecipientDomainReportingAddresses: recipientStrs})
561 return fmt.Errorf("storing recipient domain reporting addresses: %v", err)
564 q := bstore.QueryTx[tlsrptdb.TLSResult](tx)
565 q.FilterNonzero(tlsrptdb.TLSResult{DayUTC: dayUTC, PolicyDomain: policyDomain})
566 _, err := q.UpdateNonzero(tlsrptdb.TLSResult{SentToPolicyDomain: true})
568 return fmt.Errorf("already marking tls results as sent for policy domain: %v", err)
574 return false, fmt.Errorf("marking tls results as sent: %v", err)
578 for _, rcpt := range recipients {
579 // If recipient is on suppression list, we won't queue the reporting message.
580 q := bstore.QueryDB[tlsrptdb.SuppressAddress](ctx, db)
581 q.FilterNonzero(tlsrptdb.SuppressAddress{ReportingAddress: rcpt.Address.Path().String()})
582 q.FilterGreater("Until", time.Now())
583 exists, err := q.Exists()
585 return false, fmt.Errorf("querying suppress list: %v", err)
588 log.Info("suppressing outgoing tls report", slog.Any("reportingaddress", rcpt.Address))
592 qm := queue.MakeMsg(from.Path(), rcpt.Address.Path(), has8bit, smtputf8, msgSize, messageID, []byte(msgPrefix), nil, time.Now(), subject)
593 // Don't try as long as regular deliveries, and stop before we would send the
594 // delayed DSN. Though we also won't send that due to IsTLSReport.
597 qm.IsTLSReport = true
602 err = queueAdd(ctx, log, mox.Conf.Static.Postmaster.Account, msgf, qm)
605 log.Errorx("queueing message with tls report", err)
606 metricReportError.Inc()
610 log.Debug("tls report queued", slog.Any("recipient", rcpt))
615 // Regardless of whether we queued a report, we are not going to keep the
616 // evaluations around. Though this can be overridden if tempError is set.
622func composeMessage(ctx context.Context, log mlog.Log, mf *os.File, policyDomain dns.Domain, confDKIM config.DKIM, fromAddr smtp.Address, recipients []message.NameAddress, subject, text, filename string, reportFile *os.File) (msgPrefix string, has8bit, smtputf8 bool, messageID string, rerr error) {
623 // We only use smtputf8 if we have to, with a utf-8 localpart. For IDNA, we use ASCII domains.
624 smtputf8 = fromAddr.Localpart.IsInternational()
625 for _, r := range recipients {
627 smtputf8 = r.Address.Localpart.IsInternational()
631 xc := message.NewComposer(mf, 100*1024*1024, smtputf8)
637 if err, ok := x.(error); ok && errors.Is(err, message.ErrCompose) {
644 xc.HeaderAddrs("From", []message.NameAddress{{Address: fromAddr}})
645 xc.HeaderAddrs("To", recipients)
648 xc.Header("TLS-Report-Domain", policyDomain.ASCII)
649 xc.Header("TLS-Report-Submitter", fromAddr.Domain.ASCII)
651 xc.Header("TLS-Required", "No")
652 messageID = fmt.Sprintf("<%s>", mox.MessageIDGen(xc.SMTPUTF8))
653 xc.Header("Message-Id", messageID)
654 xc.Header("Date", time.Now().Format(message.RFC5322Z))
655 xc.Header("User-Agent", "mox/"+moxvar.Version)
656 xc.Header("MIME-Version", "1.0")
658 // Multipart message, with a text/plain and the report attached.
659 mp := multipart.NewWriter(xc)
661 xc.Header("Content-Type", fmt.Sprintf(`multipart/report; report-type="tlsrpt"; boundary="%s"`, mp.Boundary()))
664 // Textual part, just mentioning this is a TLS report.
665 textBody, ct, cte := xc.TextPart("plain", text)
666 textHdr := textproto.MIMEHeader{}
667 textHdr.Set("Content-Type", ct)
668 textHdr.Set("Content-Transfer-Encoding", cte)
669 textp, err := mp.CreatePart(textHdr)
670 xc.Checkf(err, "adding text part to message")
671 _, err = textp.Write(textBody)
672 xc.Checkf(err, "writing text part")
674 // TLS report as attachment.
675 ahdr := textproto.MIMEHeader{}
676 ct = mime.FormatMediaType("application/tlsrpt+gzip", map[string]string{"name": filename})
677 ahdr.Set("Content-Type", ct)
678 cd := mime.FormatMediaType("attachment", map[string]string{"filename": filename})
679 ahdr.Set("Content-Disposition", cd)
680 ahdr.Set("Content-Transfer-Encoding", "base64")
681 ap, err := mp.CreatePart(ahdr)
682 xc.Checkf(err, "adding tls report to message")
683 wc := moxio.Base64Writer(ap)
684 _, err = io.Copy(wc, &moxio.AtReader{R: reportFile})
685 xc.Checkf(err, "adding attachment")
687 xc.Checkf(err, "flushing attachment")
690 xc.Checkf(err, "closing multipart")
694 selectors := mox.DKIMSelectors(confDKIM)
695 for i, sel := range selectors {
697 sel.Headers = append(append([]string{}, sel.Headers...), "TLS-Report-Domain", "TLS-Report-Submitter")
701 dkimHeader, err := dkim.Sign(ctx, log.Logger, fromAddr.Localpart, fromAddr.Domain, selectors, smtputf8, mf)
702 xc.Checkf(err, "dkim-signing report message")
704 return dkimHeader, xc.Has8bit, xc.SMTPUTF8, messageID, nil