9 "github.com/mjl-/mox/dns"
10 "github.com/mjl-/mox/smtp"
13// Pedantic enables stricter parsing.
18func (e parseErr) Error() string {
22var _ error = parseErr("")
26 o int // Offset into s.
27 tracked string // All data consumed, except when "drop" is true. To be set by caller when parsing the value for "b=".
29 smtputf8 bool // If set, allow characters > 0x7f.
32func (p *parser) xerrorf(format string, args ...any) {
33 msg := fmt.Sprintf(format, args...)
35 msg = fmt.Sprintf("%s (leftover %q)", msg, p.s[p.o:])
40func (p *parser) track(s string) {
46func (p *parser) hasPrefix(s string) bool {
47 return strings.HasPrefix(p.s[p.o:], s)
50func (p *parser) xtaken(n int) string {
57func (p *parser) xtakefn(ignoreFWS bool, fn func(c rune, i int) bool) string {
59 for i, c := range p.s[p.o:] {
62 case ' ', '\t', '\r', '\n':
70 p.xtaken(len(p.s) - p.o)
74func (p *parser) empty() bool {
75 return p.o >= len(p.s)
78func (p *parser) xnonempty() {
80 p.xerrorf("expected at least 1 more char")
84func (p *parser) xtakefn1(ignoreFWS bool, fn func(c rune, i int) bool) string {
87 for i, c := range p.s[p.o:] {
90 case ' ', '\t', '\r', '\n':
94 p.xerrorf("expected at least 1 char")
101 return p.xtaken(len(p.s) - p.o)
104func (p *parser) wsp() {
105 p.xtakefn(false, func(c rune, i int) bool {
106 return c == ' ' || c == '\t'
110func (p *parser) fws() {
112 if p.hasPrefix("\r\n ") || p.hasPrefix("\r\n\t") {
118// peekfws returns whether remaining text starts with s, optionally prefix with fws.
119func (p *parser) peekfws(s string) bool {
127func (p *parser) xtake(s string) string {
128 if !strings.HasPrefix(p.s[p.o:], s) {
129 p.xerrorf("expected %q", s)
131 return p.xtaken(len(s))
134func (p *parser) take(s string) bool {
135 if strings.HasPrefix(p.s[p.o:], s) {
144func (p *parser) xtagName() string {
145 return p.xtakefn1(false, func(c rune, i int) bool {
146 return isalpha(c) || i > 0 && (isdigit(c) || c == '_')
150func (p *parser) xalgorithm() (string, string) {
152 xtagx := func(c rune, i int) bool {
153 return isalpha(c) || i > 0 && isdigit(c)
155 algk := p.xtakefn1(false, xtagx)
157 algv := p.xtakefn1(false, xtagx)
161// fws in value is ignored. empty/no base64 characters is valid.
164func (p *parser) xbase64() []byte {
166 p.xtakefn(false, func(c rune, i int) bool {
167 if isalphadigit(c) || c == '+' || c == '/' || c == '=' {
171 if c == ' ' || c == '\t' {
175 if strings.HasPrefix(rem, "\r\n ") || strings.HasPrefix(rem, "\r\n\t") {
178 if (strings.HasPrefix(rem, "\n ") || strings.HasPrefix(rem, "\n\t")) && p.o+i-1 > 0 && p.s[p.o+i-1] == '\r' {
183 buf, err := base64.StdEncoding.DecodeString(s)
185 p.xerrorf("decoding base64: %v", err)
190// parses canonicalization in original case.
191func (p *parser) xcanonical() string {
193 s := p.xhyphenatedWord()
195 return s + "/" + p.xhyphenatedWord()
200func (p *parser) xdomainselector(isselector bool) dns.Domain {
201 subdomain := func(c rune, i int) bool {
203 // dkim selectors with underscores happen in the wild, accept them when not in
205 return isalphadigit(c) || (i > 0 && (c == '-' || isselector && !Pedantic && c == '_') && p.o+1 < len(p.s))
207 s := p.xtakefn1(false, subdomain)
208 for p.hasPrefix(".") {
209 s += p.xtake(".") + p.xtakefn1(false, subdomain)
212 // Not to be interpreted as IDNA.
213 return dns.Domain{ASCII: strings.ToLower(s)}
215 d, err := dns.ParseDomain(s)
217 p.xerrorf("parsing domain %q: %s", s, err)
222func (p *parser) xdomain() dns.Domain {
223 return p.xdomainselector(false)
226func (p *parser) xselector() dns.Domain {
227 return p.xdomainselector(true)
230func (p *parser) xhdrName(ignoreFWS bool) string {
233 // BNF for hdr-name (field-name) allows ";", but DKIM disallows unencoded semicolons.
../rfc/6376:643
235 return p.xtakefn1(ignoreFWS, func(c rune, i int) bool {
236 return c > ' ' && c < 0x7f && c != ':' && c != ';'
240func (p *parser) xsignedHeaderFields() []string {
242 l := []string{p.xhdrName(false)}
247 l = append(l, p.xhdrName(false))
252func (p *parser) xauid() Identity {
254 // Localpart is optional.
256 return Identity{Domain: p.xdomain()}
261 return Identity{&lp, dom}
264// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
265func (p *parser) xlocalpart() smtp.Localpart {
269 if p.hasPrefix(`"`) {
270 s = p.xquotedString()
277 // In the wild, some services use large localparts for generated (bounce) addresses.
278 if Pedantic && len(s) > 64 || len(s) > 128 {
280 p.xerrorf("localpart longer than 64 octets")
282 return smtp.Localpart(s)
285func (p *parser) xquotedString() string {
292 if c >= ' ' && c < 0x7f {
297 p.xerrorf("invalid localpart, bad escaped char %c", c)
306 if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || (c > 0x7f && p.smtputf8) {
310 p.xerrorf("invalid localpart, invalid character %c", c)
314func (p *parser) xchar() rune {
315 // We are careful to track invalid utf-8 properly.
317 p.xerrorf("need another character")
321 for i, c := range p.s[p.o:] {
332 p.track(p.s[p.o : p.o+o])
338func (p *parser) xatom() string {
339 return p.xtakefn1(false, func(c rune, i int) bool {
341 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
344 return isalphadigit(c) || (c > 0x7f && p.smtputf8)
348func (p *parser) xbodyLength() int64 {
353func (p *parser) xnumber(maxdigits int) int64 {
355 for i, c := range p.s[p.o:] {
356 if c >= '0' && c <= '9' {
363 p.xerrorf("expected digits")
366 p.xerrorf("too many digits")
368 v, err := strconv.ParseInt(p.xtaken(o+1), 10, 64)
370 p.xerrorf("parsing digits: %s", err)
375func (p *parser) xqueryMethods() []string {
377 l := []string{p.xqtagmethod()}
381 l = append(l, p.xqtagmethod())
386func (p *parser) xqtagmethod() string {
388 s := p.xhyphenatedWord()
389 // ABNF production "x-sig-q-tag-args" should probably just have been
390 // "hyphenated-word". As qp-hdr-value, it will consume ":". A similar problem does
391 // not occur for "z" because it is also "|"-delimited. We work around the potential
392 // issue by parsing "dns/txt" explicitly.
394 if strings.EqualFold(s, "dns") && len(rem) >= len("/txt") && strings.EqualFold(rem[:len("/txt")], "/txt") {
396 } else if p.take("/") {
397 s += "/" + p.xqp(true, true, false)
402func isalpha(c rune) bool {
403 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
406func isdigit(c rune) bool {
407 return c >= '0' && c <= '9'
410func isalphadigit(c rune) bool {
411 return isalpha(c) || isdigit(c)
415func (p *parser) xhyphenatedWord() string {
416 return p.xtakefn1(false, func(c rune, i int) bool {
417 return isalpha(c) || i > 0 && isdigit(c) || i > 0 && c == '-' && p.o+i+1 < len(p.s) && isalphadigit(rune(p.s[p.o+i+1]))
422func (p *parser) xqphdrvalue(ignoreFWS bool) string {
423 return p.xqp(true, false, ignoreFWS)
426func (p *parser) xqpSection() string {
427 return p.xqp(false, false, false)
430// dkim-quoted-printable (pipeEncoded true) or qp-section.
432// It is described in terms of (lots of) modifications to MIME quoted-printable,
433// but it may be simpler to just ignore that reference.
435// ignoreFWS is required for "z=", which can have FWS anywhere.
436func (p *parser) xqp(pipeEncoded, colonEncoded, ignoreFWS bool) string {
439 hex := func(c byte) rune {
440 if c >= '0' && c <= '9' {
443 return rune(10 + c - 'A')
449 if pipeEncoded && p.hasPrefix("|") {
452 if colonEncoded && p.hasPrefix(":") {
456 h := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
457 return i < 2 && (c >= '0' && c <= '9' || c >= 'A' && c <= 'Z')
460 p.xerrorf("expected qp-hdr-value")
462 c := (hex(h[0]) << 4) | hex(h[1])
466 x := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
467 return c > ' ' && c < 0x7f && c != ';' && c != '=' && !(pipeEncoded && c == '|')
477func (p *parser) xtimestamp() int64 {
482func (p *parser) xcopiedHeaderFields() []string {
484 l := []string{p.xztagcopy()}
485 for p.hasPrefix("|") {
488 l = append(l, p.xztagcopy())
493func (p *parser) xztagcopy() string {
494 // ABNF does not mention FWS (unlike for other fields), but FWS is allowed everywhere in the value...
496 f := p.xhdrName(true)
499 v := p.xqphdrvalue(true)