9 "golang.org/x/text/unicode/norm"
11 "github.com/mjl-/mox/dns"
12 "github.com/mjl-/mox/smtp"
15// Pedantic enables stricter parsing.
20func (e parseErr) Error() string {
24var _ error = parseErr("")
28 o int // Offset into s.
29 tracked string // All data consumed, except when "drop" is true. To be set by caller when parsing the value for "b=".
31 smtputf8 bool // If set, allow characters > 0x7f.
34func (p *parser) xerrorf(format string, args ...any) {
35 msg := fmt.Sprintf(format, args...)
37 msg = fmt.Sprintf("%s (leftover %q)", msg, p.s[p.o:])
42func (p *parser) track(s string) {
48func (p *parser) hasPrefix(s string) bool {
49 return strings.HasPrefix(p.s[p.o:], s)
52func (p *parser) xtaken(n int) string {
59func (p *parser) xtakefn(ignoreFWS bool, fn func(c rune, i int) bool) string {
61 for i, c := range p.s[p.o:] {
64 case ' ', '\t', '\r', '\n':
72 p.xtaken(len(p.s) - p.o)
76func (p *parser) empty() bool {
77 return p.o >= len(p.s)
80func (p *parser) xnonempty() {
82 p.xerrorf("expected at least 1 more char")
86func (p *parser) xtakefn1(ignoreFWS bool, fn func(c rune, i int) bool) string {
89 for i, c := range p.s[p.o:] {
92 case ' ', '\t', '\r', '\n':
96 p.xerrorf("expected at least 1 char")
103 return p.xtaken(len(p.s) - p.o)
106func (p *parser) wsp() {
107 p.xtakefn(false, func(c rune, i int) bool {
108 return c == ' ' || c == '\t'
112func (p *parser) fws() {
114 if p.hasPrefix("\r\n ") || p.hasPrefix("\r\n\t") {
120// peekfws returns whether remaining text starts with s, optionally prefix with fws.
121func (p *parser) peekfws(s string) bool {
129func (p *parser) xtake(s string) string {
130 if !strings.HasPrefix(p.s[p.o:], s) {
131 p.xerrorf("expected %q", s)
133 return p.xtaken(len(s))
136func (p *parser) take(s string) bool {
137 if strings.HasPrefix(p.s[p.o:], s) {
146func (p *parser) xtagName() string {
147 return p.xtakefn1(false, func(c rune, i int) bool {
148 return isalpha(c) || i > 0 && (isdigit(c) || c == '_')
152func (p *parser) xalgorithm() (string, string) {
154 xtagx := func(c rune, i int) bool {
155 return isalpha(c) || i > 0 && isdigit(c)
157 algk := p.xtakefn1(false, xtagx)
159 algv := p.xtakefn1(false, xtagx)
163// fws in value is ignored. empty/no base64 characters is valid.
166func (p *parser) xbase64() []byte {
168 p.xtakefn(false, func(c rune, i int) bool {
169 if isalphadigit(c) || c == '+' || c == '/' || c == '=' {
173 if c == ' ' || c == '\t' {
177 if strings.HasPrefix(rem, "\r\n ") || strings.HasPrefix(rem, "\r\n\t") {
180 if (strings.HasPrefix(rem, "\n ") || strings.HasPrefix(rem, "\n\t")) && p.o+i-1 > 0 && p.s[p.o+i-1] == '\r' {
185 buf, err := base64.StdEncoding.DecodeString(s)
187 p.xerrorf("decoding base64: %v", err)
192// parses canonicalization in original case.
193func (p *parser) xcanonical() string {
195 s := p.xhyphenatedWord()
197 return s + "/" + p.xhyphenatedWord()
202func (p *parser) xdomainselector(isselector bool) dns.Domain {
203 subdomain := func(c rune, i int) bool {
205 // dkim selectors with underscores happen in the wild, accept them when not in
207 return isalphadigit(c) || (i > 0 && (c == '-' || isselector && !Pedantic && c == '_') && p.o+1 < len(p.s))
209 s := p.xtakefn1(false, subdomain)
210 for p.hasPrefix(".") {
211 s += p.xtake(".") + p.xtakefn1(false, subdomain)
214 // Not to be interpreted as IDNA.
215 return dns.Domain{ASCII: strings.ToLower(s)}
217 d, err := dns.ParseDomain(s)
219 p.xerrorf("parsing domain %q: %s", s, err)
224func (p *parser) xdomain() dns.Domain {
225 return p.xdomainselector(false)
228func (p *parser) xselector() dns.Domain {
229 return p.xdomainselector(true)
232func (p *parser) xhdrName(ignoreFWS bool) string {
235 // BNF for hdr-name (field-name) allows ";", but DKIM disallows unencoded semicolons.
../rfc/6376:643
237 return p.xtakefn1(ignoreFWS, func(c rune, i int) bool {
238 return c > ' ' && c < 0x7f && c != ':' && c != ';'
242func (p *parser) xsignedHeaderFields() []string {
244 l := []string{p.xhdrName(false)}
249 l = append(l, p.xhdrName(false))
254func (p *parser) xauid() Identity {
256 // Localpart is optional.
258 return Identity{Domain: p.xdomain()}
263 return Identity{&lp, dom}
266// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
267func (p *parser) xlocalpart() smtp.Localpart {
271 if p.hasPrefix(`"`) {
272 s = p.xquotedString()
279 // In the wild, some services use large localparts for generated (bounce) addresses.
280 if Pedantic && len(s) > 64 || len(s) > 128 {
282 p.xerrorf("localpart longer than 64 octets")
284 return smtp.Localpart(norm.NFC.String(s))
287func (p *parser) xquotedString() string {
294 if c >= ' ' && c < 0x7f {
299 p.xerrorf("invalid localpart, bad escaped char %c", c)
308 if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || (c > 0x7f && p.smtputf8) {
312 p.xerrorf("invalid localpart, invalid character %c", c)
316func (p *parser) xchar() rune {
317 // We are careful to track invalid utf-8 properly.
319 p.xerrorf("need another character")
323 for i, c := range p.s[p.o:] {
334 p.track(p.s[p.o : p.o+o])
340func (p *parser) xatom() string {
341 return p.xtakefn1(false, func(c rune, i int) bool {
343 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
346 return isalphadigit(c) || (c > 0x7f && p.smtputf8)
350func (p *parser) xbodyLength() int64 {
355func (p *parser) xnumber(maxdigits int) int64 {
357 for i, c := range p.s[p.o:] {
358 if c >= '0' && c <= '9' {
365 p.xerrorf("expected digits")
368 p.xerrorf("too many digits")
370 v, err := strconv.ParseInt(p.xtaken(o+1), 10, 64)
372 p.xerrorf("parsing digits: %s", err)
377func (p *parser) xqueryMethods() []string {
379 l := []string{p.xqtagmethod()}
383 l = append(l, p.xqtagmethod())
388func (p *parser) xqtagmethod() string {
390 s := p.xhyphenatedWord()
391 // ABNF production "x-sig-q-tag-args" should probably just have been
392 // "hyphenated-word". As qp-hdr-value, it will consume ":". A similar problem does
393 // not occur for "z" because it is also "|"-delimited. We work around the potential
394 // issue by parsing "dns/txt" explicitly.
396 if strings.EqualFold(s, "dns") && len(rem) >= len("/txt") && strings.EqualFold(rem[:len("/txt")], "/txt") {
398 } else if p.take("/") {
399 s += "/" + p.xqp(true, true, false)
404func isalpha(c rune) bool {
405 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
408func isdigit(c rune) bool {
409 return c >= '0' && c <= '9'
412func isalphadigit(c rune) bool {
413 return isalpha(c) || isdigit(c)
417func (p *parser) xhyphenatedWord() string {
418 return p.xtakefn1(false, func(c rune, i int) bool {
419 return isalpha(c) || i > 0 && isdigit(c) || i > 0 && c == '-' && p.o+i+1 < len(p.s) && isalphadigit(rune(p.s[p.o+i+1]))
424func (p *parser) xqphdrvalue(ignoreFWS bool) string {
425 return p.xqp(true, false, ignoreFWS)
428func (p *parser) xqpSection() string {
429 return p.xqp(false, false, false)
432// dkim-quoted-printable (pipeEncoded true) or qp-section.
434// It is described in terms of (lots of) modifications to MIME quoted-printable,
435// but it may be simpler to just ignore that reference.
437// ignoreFWS is required for "z=", which can have FWS anywhere.
438func (p *parser) xqp(pipeEncoded, colonEncoded, ignoreFWS bool) string {
441 hex := func(c byte) rune {
442 if c >= '0' && c <= '9' {
445 return rune(10 + c - 'A')
451 if pipeEncoded && p.hasPrefix("|") {
454 if colonEncoded && p.hasPrefix(":") {
458 h := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
459 return i < 2 && (c >= '0' && c <= '9' || c >= 'A' && c <= 'Z')
462 p.xerrorf("expected qp-hdr-value")
464 c := (hex(h[0]) << 4) | hex(h[1])
468 x := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
469 return c > ' ' && c < 0x7f && c != ';' && c != '=' && !(pipeEncoded && c == '|')
479func (p *parser) xtimestamp() int64 {
484func (p *parser) xcopiedHeaderFields() []string {
486 l := []string{p.xztagcopy()}
487 for p.hasPrefix("|") {
490 l = append(l, p.xztagcopy())
495func (p *parser) xztagcopy() string {
496 // ABNF does not mention FWS (unlike for other fields), but FWS is allowed everywhere in the value...
498 f := p.xhdrName(true)
501 v := p.xqphdrvalue(true)