1package smtpserver
2
3import (
4 "errors"
5 "fmt"
6 "net"
7 "strconv"
8 "strings"
9 "time"
10
11 "golang.org/x/text/unicode/norm"
12
13 "github.com/mjl-/mox/dns"
14 "github.com/mjl-/mox/mox-"
15 "github.com/mjl-/mox/smtp"
16)
17
18// Parser holds the original string and string with ascii a-z upper-cased for easy
19// case-insensitive parsing.
20type parser struct {
21 orig string
22 upper string
23 o int // Offset into orig/upper.
24 smtputf8 bool // Whether SMTPUTF8 extension is enabled, making IDNA domains and utf8 localparts valid.
25 conn *conn
26 utf8LocalpartCode int // If non-zero, error for utf-8 localpart when smtputf8 not enabled.
27}
28
29// toUpper upper cases bytes that are a-z. strings.ToUpper does too much. and
30// would replace invalid bytes with unicode replacement characters, which would
31// break our requirement that offsets into the original and upper case strings
32// point to the same character.
33func toUpper(s string) string {
34 r := []byte(s)
35 for i, c := range r {
36 if c >= 'a' && c <= 'z' {
37 r[i] = c - 0x20
38 }
39 }
40 return string(r)
41}
42
43func newParser(s string, smtputf8 bool, conn *conn) *parser {
44 return &parser{orig: s, upper: toUpper(s), smtputf8: smtputf8, conn: conn}
45}
46
47func (p *parser) xerrorf(format string, args ...any) {
48 // For submission, send the remaining unparsed line. Otherwise, only log it.
49 var err error
50 errmsg := "bad syntax: " + fmt.Sprintf(format, args...)
51 remaining := fmt.Sprintf(" (remaining %q)", p.orig[p.o:])
52 if p.conn.account != nil {
53 errmsg += remaining
54 err = errors.New(errmsg)
55 } else {
56 err = errors.New(errmsg + remaining)
57 }
58
59 // ../rfc/5321:2377
60 panic(smtpError{smtp.C501BadParamSyntax, smtp.SeProto5Syntax2, errmsg, err, false, true})
61}
62
63func (p *parser) xutf8localparterrorf() {
64 code := p.utf8LocalpartCode
65 if code == 0 {
66 code = smtp.C550MailboxUnavail
67 }
68 // ../rfc/6531:466
69 xsmtpUserErrorf(code, smtp.SeMsg6NonASCIIAddrNotPermitted7, "non-ascii address not permitted without smtputf8")
70}
71
72func (p *parser) empty() bool {
73 return p.o == len(p.orig)
74}
75
76// note: use xend() for check for end of line with remaining white space, to be used by commands.
77func (p *parser) xempty() {
78 if p.o != len(p.orig) {
79 p.xerrorf("expected end of line")
80 }
81}
82
83// check we are at the end of a command.
84func (p *parser) xend() {
85 // For submission, we are strict.
86 if p.conn.submission {
87 p.xempty()
88 }
89 // Otherwise we allow trailing white space. ../rfc/5321:1758
90 rem := p.remainder()
91 for _, c := range rem {
92 if c != ' ' && c != '\t' {
93 p.xerrorf("trailing data, not white space: %q", rem)
94 }
95 }
96}
97
98func (p *parser) hasPrefix(s string) bool {
99 return strings.HasPrefix(p.upper[p.o:], s)
100}
101
102func (p *parser) take(s string) bool {
103 if p.hasPrefix(s) {
104 p.o += len(s)
105 return true
106 }
107 return false
108}
109
110func (p *parser) xtake(s string) {
111 if !p.take(s) {
112 p.xerrorf("expected %q", s)
113 }
114}
115
116func (p *parser) space() bool {
117 return p.take(" ")
118}
119
120func (p *parser) xspace() {
121 p.xtake(" ")
122}
123
124func (p *parser) xtaken(n int) string {
125 r := p.orig[p.o : p.o+n]
126 p.o += n
127 return r
128}
129
130func (p *parser) remainder() string {
131 r := p.orig[p.o:]
132 p.o = len(p.orig)
133 return r
134}
135
136func (p *parser) peekchar() rune {
137 for _, c := range p.upper[p.o:] {
138 return c
139 }
140 return -1
141}
142
143func (p *parser) xtakefn1(what string, fn func(c rune, i int) bool) string {
144 if p.empty() {
145 p.xerrorf("need at least one char for %s", what)
146 }
147 for i, c := range p.upper[p.o:] {
148 if !fn(c, i) {
149 if i == 0 {
150 p.xerrorf("expected at least one char for %s", what)
151 }
152 return p.xtaken(i)
153 }
154 }
155 return p.remainder()
156}
157
158func (p *parser) xtakefn1case(what string, fn func(c rune, i int) bool) string {
159 if p.empty() {
160 p.xerrorf("need at least one char for %s", what)
161 }
162 for i, c := range p.orig[p.o:] {
163 if !fn(c, i) {
164 if i == 0 {
165 p.xerrorf("expected at least one char for %s", what)
166 }
167 return p.xtaken(i)
168 }
169 }
170 return p.remainder()
171}
172
173func (p *parser) xtakefn(fn func(c rune, i int) bool) string {
174 for i, c := range p.upper[p.o:] {
175 if !fn(c, i) {
176 return p.xtaken(i)
177 }
178 }
179 return p.remainder()
180}
181
182// xrawReversePath returns the raw string between the <>'s. We cannot parse it
183// immediately, because if this is an IDNA (internationalization) address, we would
184// only see the SMTPUTF8 indicator after having parsed the reverse path here. So we
185// parse the raw data here, and validate it after having seen all parameters.
186// ../rfc/5321:2260
187func (p *parser) xrawReversePath() string {
188 p.xtake("<")
189 s := p.xtakefn(func(c rune, i int) bool {
190 return c != '>'
191 })
192 p.xtake(">")
193 return s
194}
195
196// xbareReversePath parses a reverse-path without <>, as returned by
197// xrawReversePath. It takes smtputf8 into account.
198// ../rfc/5321:2260
199func (p *parser) xbareReversePath() smtp.Path {
200 if p.empty() {
201 return smtp.Path{}
202 }
203 // ../rfc/6531:468
204 p.utf8LocalpartCode = smtp.C550MailboxUnavail
205 defer func() {
206 p.utf8LocalpartCode = 0
207 }()
208 return p.xbarePath()
209}
210
211func (p *parser) xforwardPath() smtp.Path {
212 // ../rfc/6531:466
213 p.utf8LocalpartCode = smtp.C553BadMailbox
214 defer func() {
215 p.utf8LocalpartCode = 0
216 }()
217 return p.xpath()
218}
219
220// ../rfc/5321:2264
221func (p *parser) xpath() smtp.Path {
222 o := p.o
223 p.xtake("<")
224 r := p.xbarePath()
225 p.xtake(">")
226 if p.o-o > 256 {
227 // ../rfc/5321:3495
228 p.xerrorf("path longer than 256 octets")
229 }
230 return r
231}
232
233func (p *parser) xbarePath() smtp.Path {
234 // We parse but ignore any source routing.
235 // ../rfc/5321:1081 ../rfc/5321:1430 ../rfc/5321:1925
236 if p.take("@") {
237 p.xdomain()
238 for p.take(",") {
239 p.xtake("@")
240 p.xdomain()
241 }
242 p.xtake(":")
243 }
244 return p.xmailbox()
245}
246
247// ../rfc/5321:2291
248func (p *parser) xdomain() dns.Domain {
249 s := p.xsubdomain()
250 for p.take(".") {
251 s += "." + p.xsubdomain()
252 }
253 d, err := dns.ParseDomain(s)
254 if err != nil {
255 p.xerrorf("parsing domain name %q: %s", s, err)
256 }
257 if len(s) > 255 {
258 // ../rfc/5321:3491
259 p.xerrorf("domain longer than 255 octets")
260 }
261 return d
262}
263
264// ../rfc/5321:2303 ../rfc/6531:411
265func (p *parser) xsubdomain() string {
266 return p.xtakefn1("subdomain", func(c rune, i int) bool {
267 return c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || i > 0 && c == '-' || c > 0x7f && p.smtputf8
268 })
269}
270
271// ../rfc/5321:2314
272func (p *parser) xmailbox() smtp.Path {
273 localpart := p.xlocalpart()
274 p.xtake("@")
275 return smtp.Path{Localpart: localpart, IPDomain: p.xipdomain(false)}
276}
277
278// ../rfc/5321:2307
279func (p *parser) xldhstr() string {
280 s := p.xtakefn1("ldh-str", func(c rune, i int) bool {
281 return c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '-'
282 })
283 if s == "-" {
284 p.xerrorf("empty ldh-str")
285 } else if strings.HasSuffix(s, "-") {
286 p.o--
287 s = s[:len(s)-1]
288 }
289 return s
290}
291
292// parse address-literal or domain.
293func (p *parser) xipdomain(isehlo bool) dns.IPDomain {
294 // ../rfc/5321:2309
295 // ../rfc/5321:2397
296 if p.take("[") {
297 c := p.peekchar()
298 var ipv6 bool
299 if !(c >= '0' && c <= '9') {
300 addrlit := p.xldhstr()
301 p.xtake(":")
302 if !strings.EqualFold(addrlit, "IPv6") {
303 p.xerrorf("unrecognized address literal %q", addrlit)
304 }
305 ipv6 = true
306 }
307 ipaddr := p.xtakefn1("address literal", func(c rune, i int) bool {
308 return c != ']'
309 })
310 p.take("]")
311 ip := net.ParseIP(ipaddr)
312 if ip == nil {
313 p.xerrorf("invalid ip in address: %q", ipaddr)
314 }
315 isv4 := ip.To4() != nil
316 isAllowedSloppyIPv6Submission := func() bool {
317 // Mail user agents that submit are relatively likely to use IPs in EHLO and forget
318 // that an IPv6 address needs to be tagged as such. We can forgive them. For
319 // SMTP servers we are strict.
320 return isehlo && p.conn.submission && !mox.Pedantic && ip.To16() != nil
321 }
322 if ipv6 && isv4 {
323 p.xerrorf("ip address is not ipv6")
324 } else if !ipv6 && !isv4 && !isAllowedSloppyIPv6Submission() {
325 if ip.To16() != nil {
326 p.xerrorf("ip address is ipv6, must use syntax [IPv6:...]")
327 } else {
328 p.xerrorf("ip address is not ipv4")
329 }
330 }
331 return dns.IPDomain{IP: ip}
332 }
333 return dns.IPDomain{Domain: p.xdomain()}
334}
335
336// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
337func (p *parser) xlocalpart() smtp.Localpart {
338 // ../rfc/5321:2316
339 var s string
340 if p.hasPrefix(`"`) {
341 s = p.xquotedString(true)
342 } else {
343 s = p.xatom(true)
344 for p.take(".") {
345 s += "." + p.xatom(true)
346 }
347 }
348 // In the wild, some services use large localparts for generated (bounce) addresses.
349 if mox.Pedantic && len(s) > 64 || len(s) > 128 {
350 // ../rfc/5321:3486
351 p.xerrorf("localpart longer than 64 octets")
352 }
353 return smtp.Localpart(norm.NFC.String(s))
354}
355
356// ../rfc/5321:2324
357func (p *parser) xquotedString(islocalpart bool) string {
358 p.xtake(`"`)
359 var s string
360 var esc bool
361 for {
362 c := p.xchar()
363 if esc {
364 if c >= ' ' && c < 0x7f {
365 s += string(c)
366 esc = false
367 continue
368 }
369 p.xerrorf("invalid localpart, bad escaped char %c", c)
370 }
371 if c == '\\' {
372 esc = true
373 continue
374 }
375 if c == '"' {
376 return s
377 }
378 // ../rfc/5321:2332 ../rfc/6531:419
379 if islocalpart && c > 0x7f && !p.smtputf8 {
380 p.xutf8localparterrorf()
381 }
382 if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || (c > 0x7f && p.smtputf8) {
383 s += string(c)
384 continue
385 }
386 p.xerrorf("invalid localpart, invalid character %c", c)
387 }
388}
389
390func (p *parser) xchar() rune {
391 // We are careful to track invalid utf-8 properly.
392 if p.empty() {
393 p.xerrorf("need another character")
394 }
395 var r rune
396 var o int
397 for i, c := range p.orig[p.o:] {
398 if i > 0 {
399 o = i
400 break
401 }
402 r = c
403 }
404 if o == 0 {
405 p.o = len(p.orig)
406 } else {
407 p.o += o
408 }
409 return r
410}
411
412// ../rfc/5321:2320 ../rfc/6531:414
413func (p *parser) xatom(islocalpart bool) string {
414 return p.xtakefn1("atom", func(c rune, i int) bool {
415 switch c {
416 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
417 return true
418 }
419 if islocalpart && c > 0x7f && !p.smtputf8 {
420 p.xutf8localparterrorf()
421 }
422 return c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || (c > 0x7f && p.smtputf8)
423 })
424}
425
426// ../rfc/5321:2338
427func (p *parser) xstring() string {
428 if p.peekchar() == '"' {
429 return p.xquotedString(false)
430 }
431 return p.xatom(false)
432}
433
434// ../rfc/5321:2279
435func (p *parser) xparamKeyword() string {
436 return p.xtakefn1("parameter keyword", func(c rune, i int) bool {
437 return c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || (i > 0 && c == '-')
438 })
439}
440
441// ../rfc/5321:2281 ../rfc/6531:422
442func (p *parser) xparamValue() string {
443 return p.xtakefn1("parameter value", func(c rune, i int) bool {
444 return c > ' ' && c < 0x7f && c != '=' || (c > 0x7f && p.smtputf8)
445 })
446}
447
448// for smtp parameters that take a numeric parameter with specified number of
449// digits, eg SIZE=... for MAIL FROM.
450func (p *parser) xnumber(maxDigits int, allowZero bool) int64 {
451 s := p.xtakefn1("number", func(c rune, i int) bool {
452 return (c >= '1' && c <= '9' || c == '0' && (i > 0 || allowZero)) && i < maxDigits
453 })
454 v, err := strconv.ParseInt(s, 10, 64)
455 if err != nil {
456 p.xerrorf("bad number %q: %s", s, err)
457 }
458 return v
459}
460
461// parse date-time in UTC form. ../rfc/4865:147 ../rfc/4865-eid2040
462func (p *parser) xdatetimeutc() (time.Time, string) {
463 // ../rfc/3339:422
464 xdash := func() string {
465 p.xtake("-")
466 return "-"
467 }
468 xcolon := func() string {
469 p.xtake(":")
470 return ":"
471 }
472 xdigits := func(n int) string {
473 s := p.xtakefn1("digits", func(c rune, i int) bool {
474 return c >= '0' && c <= '9' && i < n
475 })
476 if len(s) != n {
477 p.xerrorf("parsing date-time: got %d digits, need %d", len(s), n)
478 }
479 return s
480 }
481 s := xdigits(4) + xdash() + xdigits(2) + xdash() + xdigits(2)
482 if !p.hasPrefix("T") {
483 p.xerrorf("expected T for date-time separator")
484 }
485 s += p.xtaken(1) + xdigits(2) + xcolon() + xdigits(2) + xcolon() + xdigits(2)
486 layout := time.RFC3339
487 if p.take(".") {
488 layout = time.RFC3339Nano
489 s += "." + p.xtakefn1("digits", func(c rune, i int) bool {
490 return c >= '0' && c <= '9'
491 })
492 }
493 if !p.hasPrefix("Z") {
494 p.xerrorf("expected Z for date-time utc timezone")
495 }
496 s += p.xtaken(1)
497
498 t, err := time.Parse(layout, s)
499 if err != nil {
500 p.xerrorf("bad utc date-time %q: %s", s, err)
501 }
502 return t, s
503}
504
505// sasl mechanism, for AUTH command.
506// ../rfc/4422:436
507func (p *parser) xsaslMech() string {
508 return p.xtakefn1case("sasl-mech", func(c rune, i int) bool {
509 return i < 20 && (c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '-' || c == '_')
510 })
511}
512
513// ../rfc/4954:696 ../rfc/6533:259
514func (p *parser) xtext() string {
515 r := ""
516 for !p.empty() {
517 b := p.orig[p.o]
518 if b >= 0x21 && b < 0x7f && b != '+' && b != '=' && b != ' ' {
519 r += string(b)
520 p.xtaken(1)
521 continue
522 }
523 if b != '+' {
524 break
525 }
526 p.xtaken(1)
527 x := p.xtaken(2)
528 for _, b := range x {
529 if b >= '0' && b <= '9' || b >= 'A' && b <= 'F' {
530 continue
531 }
532 p.xerrorf("parsing xtext: invalid hexadecimal %q", x)
533 }
534 const hex = "0123456789ABCDEF"
535 b = byte(strings.IndexByte(hex, x[0])<<4) | byte(strings.IndexByte(hex, x[1])<<0)
536 r += string(rune(b))
537 }
538 return r
539}
540