1package smtpserver
2
3import (
4 "errors"
5 "fmt"
6 "net"
7 "strconv"
8 "strings"
9
10 "github.com/mjl-/mox/dns"
11 "github.com/mjl-/mox/mox-"
12 "github.com/mjl-/mox/smtp"
13)
14
15// Parser holds the original string and string with ascii a-z upper-cased for easy
16// case-insensitive parsing.
17type parser struct {
18 orig string
19 upper string
20 o int // Offset into orig/upper.
21 smtputf8 bool // Whether SMTPUTF8 extension is enabled, making IDNA domains and utf8 localparts valid.
22 conn *conn
23 utf8LocalpartCode int // If non-zero, error for utf-8 localpart when smtputf8 not enabled.
24}
25
26// toUpper upper cases bytes that are a-z. strings.ToUpper does too much. and
27// would replace invalid bytes with unicode replacement characters, which would
28// break our requirement that offsets into the original and upper case strings
29// point to the same character.
30func toUpper(s string) string {
31 r := []byte(s)
32 for i, c := range r {
33 if c >= 'a' && c <= 'z' {
34 r[i] = c - 0x20
35 }
36 }
37 return string(r)
38}
39
40func newParser(s string, smtputf8 bool, conn *conn) *parser {
41 return &parser{orig: s, upper: toUpper(s), smtputf8: smtputf8, conn: conn}
42}
43
44func (p *parser) xerrorf(format string, args ...any) {
45 // For submission, send the remaining unparsed line. Otherwise, only log it.
46 var err error
47 errmsg := "bad syntax: " + fmt.Sprintf(format, args...)
48 remaining := fmt.Sprintf(" (remaining %q)", p.orig[p.o:])
49 if p.conn.account != nil {
50 errmsg += remaining
51 err = errors.New(errmsg)
52 } else {
53 err = errors.New(errmsg + remaining)
54 }
55
56 // ../rfc/5321:2377
57 panic(smtpError{smtp.C501BadParamSyntax, smtp.SeProto5Syntax2, errmsg, err, false, true})
58}
59
60func (p *parser) xutf8localparterrorf() {
61 code := p.utf8LocalpartCode
62 if code == 0 {
63 code = smtp.C550MailboxUnavail
64 }
65 // ../rfc/6531:466
66 xsmtpUserErrorf(code, smtp.SeMsg6NonASCIIAddrNotPermitted7, "non-ascii address not permitted without smtputf8")
67}
68
69func (p *parser) empty() bool {
70 return p.o == len(p.orig)
71}
72
73// note: use xend() for check for end of line with remaining white space, to be used by commands.
74func (p *parser) xempty() {
75 if p.o != len(p.orig) {
76 p.xerrorf("expected end of line")
77 }
78}
79
80// check we are at the end of a command.
81func (p *parser) xend() {
82 // For submission, we are strict.
83 if p.conn.submission {
84 p.xempty()
85 }
86 // Otherwise we allow trailing white space. ../rfc/5321:1758
87 rem := p.remainder()
88 for _, c := range rem {
89 if c != ' ' && c != '\t' {
90 p.xerrorf("trailing data, not white space: %q", rem)
91 }
92 }
93}
94
95func (p *parser) hasPrefix(s string) bool {
96 return strings.HasPrefix(p.upper[p.o:], s)
97}
98
99func (p *parser) take(s string) bool {
100 if p.hasPrefix(s) {
101 p.o += len(s)
102 return true
103 }
104 return false
105}
106
107func (p *parser) xtake(s string) {
108 if !p.take(s) {
109 p.xerrorf("expected %q", s)
110 }
111}
112
113func (p *parser) space() bool {
114 return p.take(" ")
115}
116
117func (p *parser) xspace() {
118 p.xtake(" ")
119}
120
121func (p *parser) xtaken(n int) string {
122 r := p.orig[p.o : p.o+n]
123 p.o += n
124 return r
125}
126
127func (p *parser) remainder() string {
128 r := p.orig[p.o:]
129 p.o = len(p.orig)
130 return r
131}
132
133func (p *parser) peekchar() rune {
134 for _, c := range p.upper[p.o:] {
135 return c
136 }
137 return -1
138}
139
140func (p *parser) takefn1(what string, fn func(c rune, i int) bool) string {
141 if p.empty() {
142 p.xerrorf("need at least one char for %s", what)
143 }
144 for i, c := range p.upper[p.o:] {
145 if !fn(c, i) {
146 if i == 0 {
147 p.xerrorf("expected at least one char for %s", what)
148 }
149 return p.xtaken(i)
150 }
151 }
152 return p.remainder()
153}
154
155func (p *parser) takefn1case(what string, fn func(c rune, i int) bool) string {
156 if p.empty() {
157 p.xerrorf("need at least one char for %s", what)
158 }
159 for i, c := range p.orig[p.o:] {
160 if !fn(c, i) {
161 if i == 0 {
162 p.xerrorf("expected at least one char for %s", what)
163 }
164 return p.xtaken(i)
165 }
166 }
167 return p.remainder()
168}
169
170func (p *parser) takefn(fn func(c rune, i int) bool) string {
171 for i, c := range p.upper[p.o:] {
172 if !fn(c, i) {
173 return p.xtaken(i)
174 }
175 }
176 return p.remainder()
177}
178
179// xrawReversePath returns the raw string between the <>'s. We cannot parse it
180// immediately, because if this is an IDNA (internationalization) address, we would
181// only see the SMTPUTF8 indicator after having parsed the reverse path here. So we
182// parse the raw data here, and validate it after having seen all parameters.
183// ../rfc/5321:2260
184func (p *parser) xrawReversePath() string {
185 p.xtake("<")
186 s := p.takefn(func(c rune, i int) bool {
187 return c != '>'
188 })
189 p.xtake(">")
190 return s
191}
192
193// xbareReversePath parses a reverse-path without <>, as returned by
194// xrawReversePath. It takes smtputf8 into account.
195// ../rfc/5321:2260
196func (p *parser) xbareReversePath() smtp.Path {
197 if p.empty() {
198 return smtp.Path{}
199 }
200 // ../rfc/6531:468
201 p.utf8LocalpartCode = smtp.C550MailboxUnavail
202 defer func() {
203 p.utf8LocalpartCode = 0
204 }()
205 return p.xbarePath()
206}
207
208func (p *parser) xforwardPath() smtp.Path {
209 // ../rfc/6531:466
210 p.utf8LocalpartCode = smtp.C553BadMailbox
211 defer func() {
212 p.utf8LocalpartCode = 0
213 }()
214 return p.xpath()
215}
216
217// ../rfc/5321:2264
218func (p *parser) xpath() smtp.Path {
219 o := p.o
220 p.xtake("<")
221 r := p.xbarePath()
222 p.xtake(">")
223 if p.o-o > 256 {
224 // ../rfc/5321:3495
225 p.xerrorf("path longer than 256 octets")
226 }
227 return r
228}
229
230func (p *parser) xbarePath() smtp.Path {
231 // We parse but ignore any source routing.
232 // ../rfc/5321:1081 ../rfc/5321:1430 ../rfc/5321:1925
233 if p.take("@") {
234 p.xdomain()
235 for p.take(",") {
236 p.xtake("@")
237 p.xdomain()
238 }
239 p.xtake(":")
240 }
241 return p.xmailbox()
242}
243
244// ../rfc/5321:2291
245func (p *parser) xdomain() dns.Domain {
246 s := p.xsubdomain()
247 for p.take(".") {
248 s += "." + p.xsubdomain()
249 }
250 d, err := dns.ParseDomain(s)
251 if err != nil {
252 p.xerrorf("parsing domain name %q: %s", s, err)
253 }
254 if len(s) > 255 {
255 // ../rfc/5321:3491
256 p.xerrorf("domain longer than 255 octets")
257 }
258 return d
259}
260
261// ../rfc/5321:2303
262// ../rfc/5321:2303 ../rfc/6531:411
263func (p *parser) xsubdomain() string {
264 return p.takefn1("subdomain", func(c rune, i int) bool {
265 return c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || i > 0 && c == '-' || c > 0x7f && p.smtputf8
266 })
267}
268
269// ../rfc/5321:2314
270func (p *parser) xmailbox() smtp.Path {
271 localpart := p.xlocalpart()
272 p.xtake("@")
273 return smtp.Path{Localpart: localpart, IPDomain: p.xipdomain(false)}
274}
275
276// ../rfc/5321:2307
277func (p *parser) xldhstr() string {
278 return p.takefn1("ldh-str", func(c rune, i int) bool {
279 return c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || i == 0 && c == '-'
280 })
281}
282
283// parse address-literal or domain.
284func (p *parser) xipdomain(isehlo bool) dns.IPDomain {
285 // ../rfc/5321:2309
286 // ../rfc/5321:2397
287 if p.take("[") {
288 c := p.peekchar()
289 var ipv6 bool
290 if !(c >= '0' && c <= '9') {
291 addrlit := p.xldhstr()
292 p.xtake(":")
293 if !strings.EqualFold(addrlit, "IPv6") {
294 p.xerrorf("unrecognized address literal %q", addrlit)
295 }
296 ipv6 = true
297 }
298 ipaddr := p.takefn1("address literal", func(c rune, i int) bool {
299 return c != ']'
300 })
301 p.take("]")
302 ip := net.ParseIP(ipaddr)
303 if ip == nil {
304 p.xerrorf("invalid ip in address: %q", ipaddr)
305 }
306 isv4 := ip.To4() != nil
307 isAllowedSloppyIPv6Submission := func() bool {
308 // Mail user agents that submit are relatively likely to use IPs in EHLO and forget
309 // that an IPv6 address needs to be tagged as such. We can forgive them. For
310 // SMTP servers we are strict.
311 return isehlo && p.conn.submission && !mox.Pedantic && ip.To16() != nil
312 }
313 if ipv6 && isv4 {
314 p.xerrorf("ip address is not ipv6")
315 } else if !ipv6 && !isv4 && !isAllowedSloppyIPv6Submission() {
316 if ip.To16() != nil {
317 p.xerrorf("ip address is ipv6, must use syntax [IPv6:...]")
318 } else {
319 p.xerrorf("ip address is not ipv4")
320 }
321 }
322 return dns.IPDomain{IP: ip}
323 }
324 return dns.IPDomain{Domain: p.xdomain()}
325}
326
327// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
328func (p *parser) xlocalpart() smtp.Localpart {
329 // ../rfc/5321:2316
330 var s string
331 if p.hasPrefix(`"`) {
332 s = p.xquotedString(true)
333 } else {
334 s = p.xatom(true)
335 for p.take(".") {
336 s += "." + p.xatom(true)
337 }
338 }
339 // In the wild, some services use large localparts for generated (bounce) addresses.
340 if mox.Pedantic && len(s) > 64 || len(s) > 128 {
341 // ../rfc/5321:3486
342 p.xerrorf("localpart longer than 64 octets")
343 }
344 return smtp.Localpart(s)
345}
346
347// ../rfc/5321:2324
348func (p *parser) xquotedString(islocalpart bool) string {
349 p.xtake(`"`)
350 var s string
351 var esc bool
352 for {
353 c := p.xchar()
354 if esc {
355 if c >= ' ' && c < 0x7f {
356 s += string(c)
357 esc = false
358 continue
359 }
360 p.xerrorf("invalid localpart, bad escaped char %c", c)
361 }
362 if c == '\\' {
363 esc = true
364 continue
365 }
366 if c == '"' {
367 return s
368 }
369 // ../rfc/5321:2332 ../rfc/6531:419
370 if islocalpart && c > 0x7f && !p.smtputf8 {
371 p.xutf8localparterrorf()
372 }
373 if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || (c > 0x7f && p.smtputf8) {
374 s += string(c)
375 continue
376 }
377 p.xerrorf("invalid localpart, invalid character %c", c)
378 }
379}
380
381func (p *parser) xchar() rune {
382 // We are careful to track invalid utf-8 properly.
383 if p.empty() {
384 p.xerrorf("need another character")
385 }
386 var r rune
387 var o int
388 for i, c := range p.orig[p.o:] {
389 if i > 0 {
390 o = i
391 break
392 }
393 r = c
394 }
395 if o == 0 {
396 p.o = len(p.orig)
397 } else {
398 p.o += o
399 }
400 return r
401}
402
403// ../rfc/5321:2320 ../rfc/6531:414
404func (p *parser) xatom(islocalpart bool) string {
405 return p.takefn1("atom", func(c rune, i int) bool {
406 switch c {
407 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
408 return true
409 }
410 if islocalpart && c > 0x7f && !p.smtputf8 {
411 p.xutf8localparterrorf()
412 }
413 return c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || (c > 0x7f && p.smtputf8)
414 })
415}
416
417// ../rfc/5321:2338
418func (p *parser) xstring() string {
419 if p.peekchar() == '"' {
420 return p.xquotedString(false)
421 }
422 return p.xatom(false)
423}
424
425// ../rfc/5321:2279
426func (p *parser) xparamKeyword() string {
427 return p.takefn1("parameter keyword", func(c rune, i int) bool {
428 return c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || (i > 0 && c == '-')
429 })
430}
431
432// ../rfc/5321:2281 ../rfc/6531:422
433func (p *parser) xparamValue() string {
434 return p.takefn1("parameter value", func(c rune, i int) bool {
435 return c > ' ' && c < 0x7f && c != '=' || (c > 0x7f && p.smtputf8)
436 })
437}
438
439// for smtp parameters that take a numeric parameter with specified number of
440// digits, eg SIZE=... for MAIL FROM.
441func (p *parser) xnumber(maxDigits int) int64 {
442 s := p.takefn1("number", func(c rune, i int) bool {
443 return c >= '0' && c <= '9' && i < maxDigits
444 })
445 v, err := strconv.ParseInt(s, 10, 64)
446 if err != nil {
447 p.xerrorf("bad number %q: %s", s, err)
448 }
449 return v
450}
451
452// sasl mechanism, for AUTH command.
453// ../rfc/4422:436
454func (p *parser) xsaslMech() string {
455 return p.takefn1case("sasl-mech", func(c rune, i int) bool {
456 return i < 20 && (c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '-' || c == '_')
457 })
458}
459
460// ../rfc/4954:696 ../rfc/6533:259
461func (p *parser) xtext() string {
462 r := ""
463 for !p.empty() {
464 b := p.orig[p.o]
465 if b >= 0x21 && b < 0x7f && b != '+' && b != '=' && b != ' ' {
466 r += string(b)
467 p.xtaken(1)
468 continue
469 }
470 if b != '+' {
471 break
472 }
473 p.xtaken(1)
474 x := p.xtaken(2)
475 for _, b := range x {
476 if b >= '0' && b <= '9' || b >= 'A' && b <= 'F' {
477 continue
478 }
479 p.xerrorf("parsing xtext: invalid hexadecimal %q", x)
480 }
481 const hex = "0123456789ABCDEF"
482 b = byte(strings.IndexByte(hex, x[0])<<4) | byte(strings.IndexByte(hex, x[1])<<0)
483 r += string(rune(b))
484 }
485 return r
486}
487