1package dkim
2
3import (
4 "encoding/base64"
5 "fmt"
6 "strconv"
7 "strings"
8
9 "golang.org/x/text/unicode/norm"
10
11 "github.com/mjl-/mox/dns"
12 "github.com/mjl-/mox/smtp"
13)
14
15// Pedantic enables stricter parsing.
16var Pedantic bool
17
18type parseErr string
19
20func (e parseErr) Error() string {
21 return string(e)
22}
23
24var _ error = parseErr("")
25
26type parser struct {
27 s string
28 o int // Offset into s.
29 tracked string // All data consumed, except when "drop" is true. To be set by caller when parsing the value for "b=".
30 drop bool
31 smtputf8 bool // If set, allow characters > 0x7f.
32}
33
34func (p *parser) xerrorf(format string, args ...any) {
35 msg := fmt.Sprintf(format, args...)
36 if p.o < len(p.s) {
37 msg = fmt.Sprintf("%s (leftover %q)", msg, p.s[p.o:])
38 }
39 panic(parseErr(msg))
40}
41
42func (p *parser) track(s string) {
43 if !p.drop {
44 p.tracked += s
45 }
46}
47
48func (p *parser) hasPrefix(s string) bool {
49 return strings.HasPrefix(p.s[p.o:], s)
50}
51
52func (p *parser) xtaken(n int) string {
53 r := p.s[p.o : p.o+n]
54 p.o += n
55 p.track(r)
56 return r
57}
58
59func (p *parser) xtakefn(ignoreFWS bool, fn func(c rune, i int) bool) string {
60 var r string
61 for i, c := range p.s[p.o:] {
62 if !fn(c, i) {
63 switch c {
64 case ' ', '\t', '\r', '\n':
65 continue
66 }
67 p.xtaken(i)
68 return r
69 }
70 r += string(c)
71 }
72 p.xtaken(len(p.s) - p.o)
73 return r
74}
75
76func (p *parser) empty() bool {
77 return p.o >= len(p.s)
78}
79
80func (p *parser) xnonempty() {
81 if p.o >= len(p.s) {
82 p.xerrorf("expected at least 1 more char")
83 }
84}
85
86func (p *parser) xtakefn1(ignoreFWS bool, fn func(c rune, i int) bool) string {
87 var r string
88 p.xnonempty()
89 for i, c := range p.s[p.o:] {
90 if !fn(c, i) {
91 switch c {
92 case ' ', '\t', '\r', '\n':
93 continue
94 }
95 if i == 0 {
96 p.xerrorf("expected at least 1 char")
97 }
98 p.xtaken(i)
99 return r
100 }
101 r += string(c)
102 }
103 return p.xtaken(len(p.s) - p.o)
104}
105
106func (p *parser) wsp() {
107 p.xtakefn(false, func(c rune, i int) bool {
108 return c == ' ' || c == '\t'
109 })
110}
111
112func (p *parser) fws() {
113 p.wsp()
114 if p.hasPrefix("\r\n ") || p.hasPrefix("\r\n\t") {
115 p.xtaken(3)
116 p.wsp()
117 }
118}
119
120// peekfws returns whether remaining text starts with s, optionally prefix with fws.
121func (p *parser) peekfws(s string) bool {
122 o := p.o
123 p.fws()
124 r := p.hasPrefix(s)
125 p.o = o
126 return r
127}
128
129func (p *parser) xtake(s string) string {
130 if !strings.HasPrefix(p.s[p.o:], s) {
131 p.xerrorf("expected %q", s)
132 }
133 return p.xtaken(len(s))
134}
135
136func (p *parser) take(s string) bool {
137 if strings.HasPrefix(p.s[p.o:], s) {
138 p.o += len(s)
139 p.track(s)
140 return true
141 }
142 return false
143}
144
145// ../rfc/6376:657
146func (p *parser) xtagName() string {
147 return p.xtakefn1(false, func(c rune, i int) bool {
148 return isalpha(c) || i > 0 && (isdigit(c) || c == '_')
149 })
150}
151
152func (p *parser) xalgorithm() (string, string) {
153 // ../rfc/6376:1046
154 xtagx := func(c rune, i int) bool {
155 return isalpha(c) || i > 0 && isdigit(c)
156 }
157 algk := p.xtakefn1(false, xtagx)
158 p.xtake("-")
159 algv := p.xtakefn1(false, xtagx)
160 return algk, algv
161}
162
163// fws in value is ignored. empty/no base64 characters is valid.
164// ../rfc/6376:1021
165// ../rfc/6376:1076
166func (p *parser) xbase64() []byte {
167 s := ""
168 p.xtakefn(false, func(c rune, i int) bool {
169 if isalphadigit(c) || c == '+' || c == '/' || c == '=' {
170 s += string(c)
171 return true
172 }
173 if c == ' ' || c == '\t' {
174 return true
175 }
176 rem := p.s[p.o+i:]
177 if strings.HasPrefix(rem, "\r\n ") || strings.HasPrefix(rem, "\r\n\t") {
178 return true
179 }
180 if (strings.HasPrefix(rem, "\n ") || strings.HasPrefix(rem, "\n\t")) && p.o+i-1 > 0 && p.s[p.o+i-1] == '\r' {
181 return true
182 }
183 return false
184 })
185 buf, err := base64.StdEncoding.DecodeString(s)
186 if err != nil {
187 p.xerrorf("decoding base64: %v", err)
188 }
189 return buf
190}
191
192// parses canonicalization in original case.
193func (p *parser) xcanonical() string {
194 // ../rfc/6376:1100
195 s := p.xhyphenatedWord()
196 if p.take("/") {
197 return s + "/" + p.xhyphenatedWord()
198 }
199 return s
200}
201
202func (p *parser) xdomainselector(isselector bool) dns.Domain {
203 subdomain := func(c rune, i int) bool {
204 // domain names must always be a-labels, ../rfc/6376:1115 ../rfc/6376:1187 ../rfc/6376:1303
205 // dkim selectors with underscores happen in the wild, accept them when not in
206 // pedantic mode. ../rfc/6376:581 ../rfc/5321:2303
207 return isalphadigit(c) || (i > 0 && (c == '-' || isselector && !Pedantic && c == '_') && p.o+1 < len(p.s))
208 }
209 s := p.xtakefn1(false, subdomain)
210 for p.hasPrefix(".") {
211 s += p.xtake(".") + p.xtakefn1(false, subdomain)
212 }
213 if isselector {
214 // Not to be interpreted as IDNA.
215 return dns.Domain{ASCII: strings.ToLower(s)}
216 }
217 d, err := dns.ParseDomain(s)
218 if err != nil {
219 p.xerrorf("parsing domain %q: %s", s, err)
220 }
221 return d
222}
223
224func (p *parser) xdomain() dns.Domain {
225 return p.xdomainselector(false)
226}
227
228func (p *parser) xselector() dns.Domain {
229 return p.xdomainselector(true)
230}
231
232func (p *parser) xhdrName(ignoreFWS bool) string {
233 // ../rfc/6376:473
234 // ../rfc/5322:1689
235 // BNF for hdr-name (field-name) allows ";", but DKIM disallows unencoded semicolons. ../rfc/6376:643
236 // ignoreFWS is needed for "z=", which can have FWS anywhere. ../rfc/6376:1372
237 return p.xtakefn1(ignoreFWS, func(c rune, i int) bool {
238 return c > ' ' && c < 0x7f && c != ':' && c != ';'
239 })
240}
241
242func (p *parser) xsignedHeaderFields() []string {
243 // ../rfc/6376:1157
244 l := []string{p.xhdrName(false)}
245 for p.peekfws(":") {
246 p.fws()
247 p.xtake(":")
248 p.fws()
249 l = append(l, p.xhdrName(false))
250 }
251 return l
252}
253
254func (p *parser) xauid() Identity {
255 // ../rfc/6376:1192
256 // Localpart is optional.
257 if p.take("@") {
258 return Identity{Domain: p.xdomain()}
259 }
260 lp := p.xlocalpart()
261 p.xtake("@")
262 dom := p.xdomain()
263 return Identity{&lp, dom}
264}
265
266// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
267func (p *parser) xlocalpart() smtp.Localpart {
268 // ../rfc/6376:434
269 // ../rfc/5321:2316
270 var s string
271 if p.hasPrefix(`"`) {
272 s = p.xquotedString()
273 } else {
274 s = p.xatom()
275 for p.take(".") {
276 s += "." + p.xatom()
277 }
278 }
279 // In the wild, some services use large localparts for generated (bounce) addresses.
280 if Pedantic && len(s) > 64 || len(s) > 128 {
281 // ../rfc/5321:3486
282 p.xerrorf("localpart longer than 64 octets")
283 }
284 return smtp.Localpart(norm.NFC.String(s))
285}
286
287func (p *parser) xquotedString() string {
288 p.xtake(`"`)
289 var s string
290 var esc bool
291 for {
292 c := p.xchar()
293 if esc {
294 if c >= ' ' && c < 0x7f {
295 s += string(c)
296 esc = false
297 continue
298 }
299 p.xerrorf("invalid localpart, bad escaped char %c", c)
300 }
301 if c == '\\' {
302 esc = true
303 continue
304 }
305 if c == '"' {
306 return s
307 }
308 if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || (c > 0x7f && p.smtputf8) {
309 s += string(c)
310 continue
311 }
312 p.xerrorf("invalid localpart, invalid character %c", c)
313 }
314}
315
316func (p *parser) xchar() rune {
317 // We are careful to track invalid utf-8 properly.
318 if p.empty() {
319 p.xerrorf("need another character")
320 }
321 var r rune
322 var o int
323 for i, c := range p.s[p.o:] {
324 if i > 0 {
325 o = i
326 break
327 }
328 r = c
329 }
330 if o == 0 {
331 p.track(p.s[p.o:])
332 p.o = len(p.s)
333 } else {
334 p.track(p.s[p.o : p.o+o])
335 p.o += o
336 }
337 return r
338}
339
340func (p *parser) xatom() string {
341 return p.xtakefn1(false, func(c rune, i int) bool {
342 switch c {
343 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
344 return true
345 }
346 return isalphadigit(c) || (c > 0x7f && p.smtputf8)
347 })
348}
349
350func (p *parser) xbodyLength() int64 {
351 // ../rfc/6376:1265
352 return p.xnumber(76)
353}
354
355func (p *parser) xnumber(maxdigits int) int64 {
356 o := -1
357 for i, c := range p.s[p.o:] {
358 if c >= '0' && c <= '9' {
359 o = i
360 } else {
361 break
362 }
363 }
364 if o == -1 {
365 p.xerrorf("expected digits")
366 }
367 if o+1 > maxdigits {
368 p.xerrorf("too many digits")
369 }
370 v, err := strconv.ParseInt(p.xtaken(o+1), 10, 64)
371 if err != nil {
372 p.xerrorf("parsing digits: %s", err)
373 }
374 return v
375}
376
377func (p *parser) xqueryMethods() []string {
378 // ../rfc/6376:1285
379 l := []string{p.xqtagmethod()}
380 for p.peekfws(":") {
381 p.fws()
382 p.xtake(":")
383 l = append(l, p.xqtagmethod())
384 }
385 return l
386}
387
388func (p *parser) xqtagmethod() string {
389 // ../rfc/6376:1295 ../rfc/6376-eid4810
390 s := p.xhyphenatedWord()
391 // ABNF production "x-sig-q-tag-args" should probably just have been
392 // "hyphenated-word". As qp-hdr-value, it will consume ":". A similar problem does
393 // not occur for "z" because it is also "|"-delimited. We work around the potential
394 // issue by parsing "dns/txt" explicitly.
395 rem := p.s[p.o:]
396 if strings.EqualFold(s, "dns") && len(rem) >= len("/txt") && strings.EqualFold(rem[:len("/txt")], "/txt") {
397 s += p.xtaken(4)
398 } else if p.take("/") {
399 s += "/" + p.xqp(true, true, false)
400 }
401 return s
402}
403
404func isalpha(c rune) bool {
405 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
406}
407
408func isdigit(c rune) bool {
409 return c >= '0' && c <= '9'
410}
411
412func isalphadigit(c rune) bool {
413 return isalpha(c) || isdigit(c)
414}
415
416// ../rfc/6376:469
417func (p *parser) xhyphenatedWord() string {
418 return p.xtakefn1(false, func(c rune, i int) bool {
419 return isalpha(c) || i > 0 && isdigit(c) || i > 0 && c == '-' && p.o+i+1 < len(p.s) && isalphadigit(rune(p.s[p.o+i+1]))
420 })
421}
422
423// ../rfc/6376:474
424func (p *parser) xqphdrvalue(ignoreFWS bool) string {
425 return p.xqp(true, false, ignoreFWS)
426}
427
428func (p *parser) xqpSection() string {
429 return p.xqp(false, false, false)
430}
431
432// dkim-quoted-printable (pipeEncoded true) or qp-section.
433//
434// It is described in terms of (lots of) modifications to MIME quoted-printable,
435// but it may be simpler to just ignore that reference.
436//
437// ignoreFWS is required for "z=", which can have FWS anywhere.
438func (p *parser) xqp(pipeEncoded, colonEncoded, ignoreFWS bool) string {
439 // ../rfc/6376:494 ../rfc/2045:1260
440
441 hex := func(c byte) rune {
442 if c >= '0' && c <= '9' {
443 return rune(c - '0')
444 }
445 return rune(10 + c - 'A')
446 }
447
448 s := ""
449 for !p.empty() {
450 p.fws()
451 if pipeEncoded && p.hasPrefix("|") {
452 break
453 }
454 if colonEncoded && p.hasPrefix(":") {
455 break
456 }
457 if p.take("=") {
458 h := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
459 return i < 2 && (c >= '0' && c <= '9' || c >= 'A' && c <= 'Z')
460 })
461 if len(h) != 2 {
462 p.xerrorf("expected qp-hdr-value")
463 }
464 c := (hex(h[0]) << 4) | hex(h[1])
465 s += string(c)
466 continue
467 }
468 x := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
469 return c > ' ' && c < 0x7f && c != ';' && c != '=' && !(pipeEncoded && c == '|')
470 })
471 if x == "" {
472 break
473 }
474 s += x
475 }
476 return s
477}
478
479func (p *parser) xtimestamp() int64 {
480 // ../rfc/6376:1325 ../rfc/6376:1358
481 return p.xnumber(12)
482}
483
484func (p *parser) xcopiedHeaderFields() []string {
485 // ../rfc/6376:1384
486 l := []string{p.xztagcopy()}
487 for p.hasPrefix("|") {
488 p.xtake("|")
489 p.fws()
490 l = append(l, p.xztagcopy())
491 }
492 return l
493}
494
495func (p *parser) xztagcopy() string {
496 // ABNF does not mention FWS (unlike for other fields), but FWS is allowed everywhere in the value...
497 // ../rfc/6376:1386 ../rfc/6376:1372
498 f := p.xhdrName(true)
499 p.fws()
500 p.xtake(":")
501 v := p.xqphdrvalue(true)
502 return f + ":" + v
503}
504