1// Package mtastsdb stores MTA-STS policies for later use.
3// An MTA-STS policy can specify how long it may be cached. By storing a
4// policy, it does not have to be fetched again during email delivery, which
5// makes it harder for attackers to intervene.
19 "github.com/prometheus/client_golang/prometheus"
20 "github.com/prometheus/client_golang/prometheus/promauto"
22 "github.com/mjl-/bstore"
24 "github.com/mjl-/mox/dns"
25 "github.com/mjl-/mox/mlog"
26 "github.com/mjl-/mox/mox-"
27 "github.com/mjl-/mox/moxvar"
28 "github.com/mjl-/mox/mtasts"
29 "github.com/mjl-/mox/tlsrpt"
33 metricGet = promauto.NewCounterVec(
34 prometheus.CounterOpts{
35 Name: "mox_mtastsdb_get_total",
36 Help: "Number of Get by result.",
42var timeNow = time.Now // Tests override this.
44// PolicyRecord is a cached policy or absence of a policy.
45type PolicyRecord struct {
46 Domain string // Domain name, with unicode characters.
47 Inserted time.Time `bstore:"default now"`
49 LastUpdate time.Time // Policies are refreshed on use and periodically.
50 LastUse time.Time `bstore:"index"`
52 RecordID string // As retrieved from DNS.
53 mtasts.Policy // As retrieved from the well-known HTTPS url.
55 // Text that make up the policy, as retrieved. We didn't store this in the past. If
56 // empty, policy can be reconstructed from Policy field. Needed by TLSRPT.
61 // No valid non-expired policy in database.
62 ErrNotFound = errors.New("mtastsdb: policy not found")
64 // Indicates an MTA-STS TXT record was fetched recently, but fetching the policy
65 // failed and should not yet be retried.
66 ErrBackoff = errors.New("mtastsdb: policy fetch failed recently")
69var DBTypes = []any{PolicyRecord{}} // Types stored in DB.
70var DB *bstore.DB // Exported for backups.
72// Init opens the database and starts a goroutine that refreshes policies in
73// the database, and keeps doing so periodically.
74func Init(refresher bool) error {
75 log := mlog.New("mtastsdb", nil)
77 p := mox.DataDirPath("mtasts.db")
78 os.MkdirAll(filepath.Dir(p), 0770)
79 opts := bstore.Options{Timeout: 5 * time.Second, Perm: 0660, RegisterLogger: moxvar.RegisterLogger(p, log.Logger)}
81 DB, err = bstore.Open(mox.Shutdown, p, &opts, DBTypes...)
87 // todo: allow us to shut down cleanly?
94// Close closes the database.
96 if err := DB.Close(); err != nil {
97 return fmt.Errorf("close db: %w", err)
103// lookup looks up a policy for the domain in the database.
105// Only non-expired records are returned.
107// Returns ErrNotFound if record is not present.
108// Returns ErrBackoff if a recent attempt to fetch a record failed.
109func lookup(ctx context.Context, log mlog.Log, domain dns.Domain) (*PolicyRecord, error) {
111 return nil, fmt.Errorf("empty domain")
114 q := bstore.QueryDB[PolicyRecord](ctx, DB)
115 q.FilterNonzero(PolicyRecord{Domain: domain.Name()})
116 q.FilterGreater("ValidEnd", now)
118 if err == bstore.ErrAbsent {
119 return nil, ErrNotFound
120 } else if err != nil {
125 if err := DB.Update(ctx, &pr); err != nil {
126 log.Errorx("marking cached mta-sts policy as used in database", err)
129 return nil, ErrBackoff
134// Upsert adds the policy to the database, overwriting an existing policy for the domain.
135// Policy can be nil, indicating a failure to fetch the policy.
136func Upsert(ctx context.Context, domain dns.Domain, recordID string, policy *mtasts.Policy, policyText string) error {
137 return DB.Write(ctx, func(tx *bstore.Tx) error {
138 pr := PolicyRecord{Domain: domain.Name()}
140 if err != nil && err != bstore.ErrAbsent {
151 p.Mode = mtasts.ModeNone
152 p.MaxAgeSeconds = 5 * 60
154 backoff := policy == nil
155 validEnd := now.Add(time.Duration(p.MaxAgeSeconds) * time.Second)
157 if err == bstore.ErrAbsent {
158 pr = PolicyRecord{domain.Name(), now, validEnd, now, now, backoff, recordID, p, policyText}
159 return tx.Insert(&pr)
162 pr.ValidEnd = validEnd
166 pr.RecordID = recordID
168 pr.PolicyText = policyText
169 return tx.Update(&pr)
173// PolicyRecords returns all policies in the database, sorted descending by last
175func PolicyRecords(ctx context.Context) ([]PolicyRecord, error) {
176 return bstore.QueryDB[PolicyRecord](ctx, DB).SortDesc("LastUse", "Domain").List()
179// Get retrieves an MTA-STS policy for domain and whether it is fresh.
181// If an error is returned, it should be considered a transient error, e.g. a
182// temporary DNS lookup failure.
184// The returned policy can be nil also when there is no error. In this case, the
185// domain does not implement MTA-STS.
187// If a policy is present in the local database, it is refreshed if needed. If no
188// policy is present for the domain, an attempt is made to fetch the policy and
189// store it in the local database.
191// Some errors are logged but not otherwise returned, e.g. if a new policy is
192// supposedly published but could not be retrieved.
194// Get returns an "sts" or "no-policy-found" in reportResult in most cases (when
195// not a local/internal error). It may add an "sts" result without policy contents
196// ("policy-string") in case of errors while fetching the policy.
197func Get(ctx context.Context, elog *slog.Logger, resolver dns.Resolver, domain dns.Domain) (policy *mtasts.Policy, reportResult tlsrpt.Result, fresh bool, err error) {
198 log := mlog.New("mtastsdb", elog)
201 if err != nil && errors.Is(err, ErrBackoff) {
203 } else if err != nil && errors.Is(err, ErrNotFound) {
205 } else if err != nil {
208 metricGet.WithLabelValues(result).Inc()
209 log.Debugx("mtastsdb get result", err, slog.Any("domain", domain), slog.Bool("fresh", fresh))
212 cachedPolicy, err := lookup(ctx, log, domain)
213 if err != nil && errors.Is(err, ErrNotFound) {
214 // We don't have a policy for this domain, not even a record that we tried recently
215 // and should backoff. So attempt to fetch policy.
216 nctx, cancel := context.WithTimeout(ctx, time.Minute)
218 record, p, ptext, err := mtasts.Get(nctx, log.Logger, resolver, domain)
221 case errors.Is(err, mtasts.ErrNoRecord) || errors.Is(err, mtasts.ErrMultipleRecords) || errors.Is(err, mtasts.ErrRecordSyntax) || errors.Is(err, mtasts.ErrNoPolicy) || errors.Is(err, mtasts.ErrPolicyFetch) || errors.Is(err, mtasts.ErrPolicySyntax):
223 log.Debugx("interpreting mtasts error to mean remote is not doing mta-sts", err)
225 if errors.Is(err, mtasts.ErrNoRecord) {
226 reportResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, domain)
228 fd := policyFetchFailureDetails(err)
229 reportResult = tlsrpt.MakeResult(tlsrpt.STS, domain, fd)
233 // Interpret as temporary error, e.g. mtasts.ErrDNS, try again later.
235 // Temporary DNS error could be an operational issue on our side, but we can still
238 fd := tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, mtasts.TLSReportFailureReason(err))
239 reportResult = tlsrpt.MakeResult(tlsrpt.STS, domain, fd)
241 return nil, reportResult, false, fmt.Errorf("lookup up mta-sts policy: %w", err)
243 } else if p.Mode == mtasts.ModeNone {
244 reportResult = tlsrpt.MakeResult(tlsrpt.NoPolicyFound, domain)
246 reportResult = tlsrpt.Result{Policy: tlsrptPolicy(p, ptext, domain)}
249 // Insert policy into database. If we could not fetch the policy itself, we back
251 if err == nil || errors.Is(err, mtasts.ErrNoPolicy) || errors.Is(err, mtasts.ErrPolicyFetch) || errors.Is(err, mtasts.ErrPolicySyntax) {
256 if err := Upsert(ctx, domain, recordID, p, ptext); err != nil {
257 log.Errorx("inserting policy into cache, continuing", err)
261 return p, reportResult, true, nil
262 } else if err != nil && errors.Is(err, ErrBackoff) {
264 // We recently failed to fetch a policy, act as if MTA-STS is not implemented.
266 fd := tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, "back-off-after-recent-fetch-error")
267 reportResult = tlsrpt.MakeResult(tlsrpt.STS, domain, fd)
268 return nil, reportResult, false, nil
269 } else if err != nil {
270 // We don't add the result to the report, this is an internal error.
271 return nil, reportResult, false, fmt.Errorf("looking up mta-sts policy in cache: %w", err)
274 // Policy was found in database. Check in DNS it is still fresh.
275 policy = &cachedPolicy.Policy
276 nctx, cancel := context.WithTimeout(ctx, 30*time.Second)
278 record, _, err := mtasts.LookupRecord(nctx, log.Logger, resolver, domain)
280 if errors.Is(err, mtasts.ErrNoRecord) {
281 if policy.Mode != mtasts.ModeNone {
282 log.Errorx("no mtasts dns record while checking non-none policy for freshness, either domain owner removed mta-sts without phasing out policy with a none-policy for period of previous max-age, or this could be an attempt to downgrade to connection without mtasts, continuing with previous policy", err)
284 // else, policy will be removed by periodic refresher in the near future.
286 // Could be a temporary DNS or configuration error.
287 log.Errorx("checking for freshness of cached mta-sts dns txt record for domain, continuing with previously cached policy", err)
291 fd := tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, mtasts.TLSReportFailureReason(err))
292 if policy.Mode != mtasts.ModeNone {
293 fd.FailureReasonCode += "+fallback-to-cached-policy"
295 reportResult = tlsrpt.Result{
296 Policy: tlsrptPolicy(policy, cachedPolicy.PolicyText, domain),
297 FailureDetails: []tlsrpt.FailureDetails{fd},
299 return policy, reportResult, false, nil
300 } else if record.ID == cachedPolicy.RecordID && cachedPolicy.PolicyText != "" {
301 // In the past, we didn't store the raw policy lines in cachedPolicy.Lines. We only
302 // stop now if we do have policy lines in the cache.
303 reportResult = tlsrpt.Result{Policy: tlsrptPolicy(policy, cachedPolicy.PolicyText, domain)}
304 return policy, reportResult, true, nil
307 // New policy should be available, or we are fetching the policy again because we
308 // didn't store the raw policy lines in the past.
309 nctx, cancel = context.WithTimeout(ctx, 30*time.Second)
311 p, ptext, err := mtasts.FetchPolicy(nctx, log.Logger, domain)
313 log.Errorx("fetching updated policy for domain, continuing with previously cached policy", err)
315 fd := policyFetchFailureDetails(err)
316 fd.FailureReasonCode += "+fallback-to-cached-policy"
317 reportResult = tlsrpt.Result{
318 Policy: tlsrptPolicy(policy, cachedPolicy.PolicyText, domain),
319 FailureDetails: []tlsrpt.FailureDetails{fd},
321 return policy, reportResult, false, nil
323 if err := Upsert(ctx, domain, record.ID, p, ptext); err != nil {
324 log.Errorx("inserting refreshed policy into cache, continuing with fresh policy", err)
326 reportResult = tlsrpt.Result{Policy: tlsrptPolicy(p, ptext, domain)}
327 return p, reportResult, true, nil
330func policyFetchFailureDetails(err error) tlsrpt.FailureDetails {
331 var verificationErr *tls.CertificateVerificationError
332 if errors.As(err, &verificationErr) {
333 resultType, reasonCode := tlsrpt.TLSFailureDetails(verificationErr)
335 reason := string(resultType)
336 if reasonCode != "" {
337 reason += "+" + reasonCode
339 return tlsrpt.Details(tlsrpt.ResultSTSWebPKIInvalid, reason)
340 } else if errors.Is(err, mtasts.ErrPolicySyntax) {
342 return tlsrpt.Details(tlsrpt.ResultSTSPolicyInvalid, mtasts.TLSReportFailureReason(err))
345 return tlsrpt.Details(tlsrpt.ResultSTSPolicyFetch, mtasts.TLSReportFailureReason(err))
348func tlsrptPolicy(p *mtasts.Policy, policyText string, domain dns.Domain) tlsrpt.ResultPolicy {
349 if policyText == "" {
350 // We didn't always store original policy lines. Reconstruct.
351 policyText = p.String()
353 lines := strings.Split(strings.TrimSuffix(policyText, "\n"), "\n")
354 for i, line := range lines {
355 lines[i] = strings.TrimSuffix(line, "\r")
358 rp := tlsrpt.ResultPolicy{
360 Domain: domain.ASCII,
363 rp.MXHost = make([]string, len(p.MX))
364 for i, mx := range p.MX {