1package message
2
3import (
4 "errors"
5 "fmt"
6 "strings"
7
8 "github.com/mjl-/mox/smtp"
9)
10
11var errBadMessageID = errors.New("not a message-id")
12
13// MessageIDCanonical parses the Message-ID, returning a canonical value that is
14// lower-cased, without <>, and no unneeded quoting. For matching in threading,
15// with References/In-Reply-To. If the message-id is invalid (e.g. no <>), an error
16// is returned. If the message-id could not be parsed as address (localpart "@"
17// domain), the raw value and the bool return parameter true is returned. It is
18// quite common that message-id's don't adhere to the localpart @ domain
19// syntax.
20func MessageIDCanonical(s string) (string, bool, error) {
21 // ../rfc/5322:1383
22
23 s = strings.TrimSpace(s)
24 if !strings.HasPrefix(s, "<") {
25 return "", false, fmt.Errorf("%w: missing <", errBadMessageID)
26 }
27 s = s[1:]
28 // Seen in practice: Message-ID: <valid@valid.example> (added by postmaster@some.example)
29 // Doesn't seem valid, but we allow it.
30 s, rem, have := strings.Cut(s, ">")
31 if !have || (rem != "" && (Pedantic || !strings.HasPrefix(rem, " "))) {
32 return "", false, fmt.Errorf("%w: missing >", errBadMessageID)
33 }
34 // We canonicalize the Message-ID: lower-case, no unneeded quoting.
35 s = strings.ToLower(s)
36 if s == "" {
37 return "", false, fmt.Errorf("%w: empty message-id", errBadMessageID)
38 }
39 addr, err := smtp.ParseAddress(s)
40 if err != nil {
41 // Common reasons for not being an address:
42 // 1. underscore in hostname.
43 // 2. ip literal instead of domain.
44 // 3. two @'s, perhaps intended as time-separator
45 // 4. no @'s, so no domain/host
46 return s, true, nil
47 }
48 // We preserve the unicode-ness of domain.
49 t := strings.Split(s, "@")
50 s = addr.Localpart.String() + "@" + t[len(t)-1]
51 return s, false, nil
52}
53