1package dkim
2
3import (
4 "encoding/base64"
5 "fmt"
6 "strconv"
7 "strings"
8
9 "github.com/mjl-/mox/dns"
10 "github.com/mjl-/mox/smtp"
11)
12
13// Pedantic enables stricter parsing.
14var Pedantic bool
15
16type parseErr string
17
18func (e parseErr) Error() string {
19 return string(e)
20}
21
22var _ error = parseErr("")
23
24type parser struct {
25 s string
26 o int // Offset into s.
27 tracked string // All data consumed, except when "drop" is true. To be set by caller when parsing the value for "b=".
28 drop bool
29 smtputf8 bool // If set, allow characters > 0x7f.
30}
31
32func (p *parser) xerrorf(format string, args ...any) {
33 msg := fmt.Sprintf(format, args...)
34 if p.o < len(p.s) {
35 msg = fmt.Sprintf("%s (leftover %q)", msg, p.s[p.o:])
36 }
37 panic(parseErr(msg))
38}
39
40func (p *parser) track(s string) {
41 if !p.drop {
42 p.tracked += s
43 }
44}
45
46func (p *parser) hasPrefix(s string) bool {
47 return strings.HasPrefix(p.s[p.o:], s)
48}
49
50func (p *parser) xtaken(n int) string {
51 r := p.s[p.o : p.o+n]
52 p.o += n
53 p.track(r)
54 return r
55}
56
57func (p *parser) xtakefn(ignoreFWS bool, fn func(c rune, i int) bool) string {
58 var r string
59 for i, c := range p.s[p.o:] {
60 if !fn(c, i) {
61 switch c {
62 case ' ', '\t', '\r', '\n':
63 continue
64 }
65 p.xtaken(i)
66 return r
67 }
68 r += string(c)
69 }
70 p.xtaken(len(p.s) - p.o)
71 return r
72}
73
74func (p *parser) empty() bool {
75 return p.o >= len(p.s)
76}
77
78func (p *parser) xnonempty() {
79 if p.o >= len(p.s) {
80 p.xerrorf("expected at least 1 more char")
81 }
82}
83
84func (p *parser) xtakefn1(ignoreFWS bool, fn func(c rune, i int) bool) string {
85 var r string
86 p.xnonempty()
87 for i, c := range p.s[p.o:] {
88 if !fn(c, i) {
89 switch c {
90 case ' ', '\t', '\r', '\n':
91 continue
92 }
93 if i == 0 {
94 p.xerrorf("expected at least 1 char")
95 }
96 p.xtaken(i)
97 return r
98 }
99 r += string(c)
100 }
101 return p.xtaken(len(p.s) - p.o)
102}
103
104func (p *parser) wsp() {
105 p.xtakefn(false, func(c rune, i int) bool {
106 return c == ' ' || c == '\t'
107 })
108}
109
110func (p *parser) fws() {
111 p.wsp()
112 if p.hasPrefix("\r\n ") || p.hasPrefix("\r\n\t") {
113 p.xtaken(3)
114 p.wsp()
115 }
116}
117
118// peekfws returns whether remaining text starts with s, optionally prefix with fws.
119func (p *parser) peekfws(s string) bool {
120 o := p.o
121 p.fws()
122 r := p.hasPrefix(s)
123 p.o = o
124 return r
125}
126
127func (p *parser) xtake(s string) string {
128 if !strings.HasPrefix(p.s[p.o:], s) {
129 p.xerrorf("expected %q", s)
130 }
131 return p.xtaken(len(s))
132}
133
134func (p *parser) take(s string) bool {
135 if strings.HasPrefix(p.s[p.o:], s) {
136 p.o += len(s)
137 p.track(s)
138 return true
139 }
140 return false
141}
142
143// ../rfc/6376:657
144func (p *parser) xtagName() string {
145 return p.xtakefn1(false, func(c rune, i int) bool {
146 return isalpha(c) || i > 0 && (isdigit(c) || c == '_')
147 })
148}
149
150func (p *parser) xalgorithm() (string, string) {
151 // ../rfc/6376:1046
152 xtagx := func(c rune, i int) bool {
153 return isalpha(c) || i > 0 && isdigit(c)
154 }
155 algk := p.xtakefn1(false, xtagx)
156 p.xtake("-")
157 algv := p.xtakefn1(false, xtagx)
158 return algk, algv
159}
160
161// fws in value is ignored. empty/no base64 characters is valid.
162// ../rfc/6376:1021
163// ../rfc/6376:1076
164func (p *parser) xbase64() []byte {
165 s := ""
166 p.xtakefn(false, func(c rune, i int) bool {
167 if isalphadigit(c) || c == '+' || c == '/' || c == '=' {
168 s += string(c)
169 return true
170 }
171 if c == ' ' || c == '\t' {
172 return true
173 }
174 rem := p.s[p.o+i:]
175 if strings.HasPrefix(rem, "\r\n ") || strings.HasPrefix(rem, "\r\n\t") {
176 return true
177 }
178 if (strings.HasPrefix(rem, "\n ") || strings.HasPrefix(rem, "\n\t")) && p.o+i-1 > 0 && p.s[p.o+i-1] == '\r' {
179 return true
180 }
181 return false
182 })
183 buf, err := base64.StdEncoding.DecodeString(s)
184 if err != nil {
185 p.xerrorf("decoding base64: %v", err)
186 }
187 return buf
188}
189
190// parses canonicalization in original case.
191func (p *parser) xcanonical() string {
192 // ../rfc/6376:1100
193 s := p.xhyphenatedWord()
194 if p.take("/") {
195 return s + "/" + p.xhyphenatedWord()
196 }
197 return s
198}
199
200func (p *parser) xdomainselector(isselector bool) dns.Domain {
201 subdomain := func(c rune, i int) bool {
202 // domain names must always be a-labels, ../rfc/6376:1115 ../rfc/6376:1187 ../rfc/6376:1303
203 // dkim selectors with underscores happen in the wild, accept them when not in
204 // pedantic mode. ../rfc/6376:581 ../rfc/5321:2303
205 return isalphadigit(c) || (i > 0 && (c == '-' || isselector && !Pedantic && c == '_') && p.o+1 < len(p.s))
206 }
207 s := p.xtakefn1(false, subdomain)
208 for p.hasPrefix(".") {
209 s += p.xtake(".") + p.xtakefn1(false, subdomain)
210 }
211 if isselector {
212 // Not to be interpreted as IDNA.
213 return dns.Domain{ASCII: strings.ToLower(s)}
214 }
215 d, err := dns.ParseDomain(s)
216 if err != nil {
217 p.xerrorf("parsing domain %q: %s", s, err)
218 }
219 return d
220}
221
222func (p *parser) xdomain() dns.Domain {
223 return p.xdomainselector(false)
224}
225
226func (p *parser) xselector() dns.Domain {
227 return p.xdomainselector(true)
228}
229
230func (p *parser) xhdrName(ignoreFWS bool) string {
231 // ../rfc/6376:473
232 // ../rfc/5322:1689
233 // BNF for hdr-name (field-name) allows ";", but DKIM disallows unencoded semicolons. ../rfc/6376:643
234 // ignoreFWS is needed for "z=", which can have FWS anywhere. ../rfc/6376:1372
235 return p.xtakefn1(ignoreFWS, func(c rune, i int) bool {
236 return c > ' ' && c < 0x7f && c != ':' && c != ';'
237 })
238}
239
240func (p *parser) xsignedHeaderFields() []string {
241 // ../rfc/6376:1157
242 l := []string{p.xhdrName(false)}
243 for p.peekfws(":") {
244 p.fws()
245 p.xtake(":")
246 p.fws()
247 l = append(l, p.xhdrName(false))
248 }
249 return l
250}
251
252func (p *parser) xauid() Identity {
253 // ../rfc/6376:1192
254 // Localpart is optional.
255 if p.take("@") {
256 return Identity{Domain: p.xdomain()}
257 }
258 lp := p.xlocalpart()
259 p.xtake("@")
260 dom := p.xdomain()
261 return Identity{&lp, dom}
262}
263
264// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
265func (p *parser) xlocalpart() smtp.Localpart {
266 // ../rfc/6376:434
267 // ../rfc/5321:2316
268 var s string
269 if p.hasPrefix(`"`) {
270 s = p.xquotedString()
271 } else {
272 s = p.xatom()
273 for p.take(".") {
274 s += "." + p.xatom()
275 }
276 }
277 // In the wild, some services use large localparts for generated (bounce) addresses.
278 if Pedantic && len(s) > 64 || len(s) > 128 {
279 // ../rfc/5321:3486
280 p.xerrorf("localpart longer than 64 octets")
281 }
282 return smtp.Localpart(s)
283}
284
285func (p *parser) xquotedString() string {
286 p.xtake(`"`)
287 var s string
288 var esc bool
289 for {
290 c := p.xchar()
291 if esc {
292 if c >= ' ' && c < 0x7f {
293 s += string(c)
294 esc = false
295 continue
296 }
297 p.xerrorf("invalid localpart, bad escaped char %c", c)
298 }
299 if c == '\\' {
300 esc = true
301 continue
302 }
303 if c == '"' {
304 return s
305 }
306 if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || (c > 0x7f && p.smtputf8) {
307 s += string(c)
308 continue
309 }
310 p.xerrorf("invalid localpart, invalid character %c", c)
311 }
312}
313
314func (p *parser) xchar() rune {
315 // We are careful to track invalid utf-8 properly.
316 if p.empty() {
317 p.xerrorf("need another character")
318 }
319 var r rune
320 var o int
321 for i, c := range p.s[p.o:] {
322 if i > 0 {
323 o = i
324 break
325 }
326 r = c
327 }
328 if o == 0 {
329 p.track(p.s[p.o:])
330 p.o = len(p.s)
331 } else {
332 p.track(p.s[p.o : p.o+o])
333 p.o += o
334 }
335 return r
336}
337
338func (p *parser) xatom() string {
339 return p.xtakefn1(false, func(c rune, i int) bool {
340 switch c {
341 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
342 return true
343 }
344 return isalphadigit(c) || (c > 0x7f && p.smtputf8)
345 })
346}
347
348func (p *parser) xbodyLength() int64 {
349 // ../rfc/6376:1265
350 return p.xnumber(76)
351}
352
353func (p *parser) xnumber(maxdigits int) int64 {
354 o := -1
355 for i, c := range p.s[p.o:] {
356 if c >= '0' && c <= '9' {
357 o = i
358 } else {
359 break
360 }
361 }
362 if o == -1 {
363 p.xerrorf("expected digits")
364 }
365 if o+1 > maxdigits {
366 p.xerrorf("too many digits")
367 }
368 v, err := strconv.ParseInt(p.xtaken(o+1), 10, 64)
369 if err != nil {
370 p.xerrorf("parsing digits: %s", err)
371 }
372 return v
373}
374
375func (p *parser) xqueryMethods() []string {
376 // ../rfc/6376:1285
377 l := []string{p.xqtagmethod()}
378 for p.peekfws(":") {
379 p.fws()
380 p.xtake(":")
381 l = append(l, p.xqtagmethod())
382 }
383 return l
384}
385
386func (p *parser) xqtagmethod() string {
387 // ../rfc/6376:1295 ../rfc/6376-eid4810
388 s := p.xhyphenatedWord()
389 // ABNF production "x-sig-q-tag-args" should probably just have been
390 // "hyphenated-word". As qp-hdr-value, it will consume ":". A similar problem does
391 // not occur for "z" because it is also "|"-delimited. We work around the potential
392 // issue by parsing "dns/txt" explicitly.
393 rem := p.s[p.o:]
394 if strings.EqualFold(s, "dns") && len(rem) >= len("/txt") && strings.EqualFold(rem[:len("/txt")], "/txt") {
395 s += p.xtaken(4)
396 } else if p.take("/") {
397 s += "/" + p.xqp(true, true, false)
398 }
399 return s
400}
401
402func isalpha(c rune) bool {
403 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
404}
405
406func isdigit(c rune) bool {
407 return c >= '0' && c <= '9'
408}
409
410func isalphadigit(c rune) bool {
411 return isalpha(c) || isdigit(c)
412}
413
414// ../rfc/6376:469
415func (p *parser) xhyphenatedWord() string {
416 return p.xtakefn1(false, func(c rune, i int) bool {
417 return isalpha(c) || i > 0 && isdigit(c) || i > 0 && c == '-' && p.o+i+1 < len(p.s) && isalphadigit(rune(p.s[p.o+i+1]))
418 })
419}
420
421// ../rfc/6376:474
422func (p *parser) xqphdrvalue(ignoreFWS bool) string {
423 return p.xqp(true, false, ignoreFWS)
424}
425
426func (p *parser) xqpSection() string {
427 return p.xqp(false, false, false)
428}
429
430// dkim-quoted-printable (pipeEncoded true) or qp-section.
431//
432// It is described in terms of (lots of) modifications to MIME quoted-printable,
433// but it may be simpler to just ignore that reference.
434//
435// ignoreFWS is required for "z=", which can have FWS anywhere.
436func (p *parser) xqp(pipeEncoded, colonEncoded, ignoreFWS bool) string {
437 // ../rfc/6376:494 ../rfc/2045:1260
438
439 hex := func(c byte) rune {
440 if c >= '0' && c <= '9' {
441 return rune(c - '0')
442 }
443 return rune(10 + c - 'A')
444 }
445
446 s := ""
447 for !p.empty() {
448 p.fws()
449 if pipeEncoded && p.hasPrefix("|") {
450 break
451 }
452 if colonEncoded && p.hasPrefix(":") {
453 break
454 }
455 if p.take("=") {
456 h := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
457 return i < 2 && (c >= '0' && c <= '9' || c >= 'A' && c <= 'Z')
458 })
459 if len(h) != 2 {
460 p.xerrorf("expected qp-hdr-value")
461 }
462 c := (hex(h[0]) << 4) | hex(h[1])
463 s += string(c)
464 continue
465 }
466 x := p.xtakefn(ignoreFWS, func(c rune, i int) bool {
467 return c > ' ' && c < 0x7f && c != ';' && c != '=' && !(pipeEncoded && c == '|')
468 })
469 if x == "" {
470 break
471 }
472 s += x
473 }
474 return s
475}
476
477func (p *parser) xtimestamp() int64 {
478 // ../rfc/6376:1325 ../rfc/6376:1358
479 return p.xnumber(12)
480}
481
482func (p *parser) xcopiedHeaderFields() []string {
483 // ../rfc/6376:1384
484 l := []string{p.xztagcopy()}
485 for p.hasPrefix("|") {
486 p.xtake("|")
487 p.fws()
488 l = append(l, p.xztagcopy())
489 }
490 return l
491}
492
493func (p *parser) xztagcopy() string {
494 // ABNF does not mention FWS (unlike for other fields), but FWS is allowed everywhere in the value...
495 // ../rfc/6376:1386 ../rfc/6376:1372
496 f := p.xhdrName(true)
497 p.fws()
498 p.xtake(":")
499 v := p.xqphdrvalue(true)
500 return f + ":" + v
501}
502