10 "github.com/mjl-/mox/dns"
11 "github.com/mjl-/mox/mox-"
12 "github.com/mjl-/mox/smtp"
15// Parser holds the original string and string with ascii a-z upper-cased for easy
16// case-insensitive parsing.
20 o int // Offset into orig/upper.
21 smtputf8 bool // Whether SMTPUTF8 extension is enabled, making IDNA domains and utf8 localparts valid.
23 utf8LocalpartCode int // If non-zero, error for utf-8 localpart when smtputf8 not enabled.
26// toUpper upper cases bytes that are a-z. strings.ToUpper does too much. and
27// would replace invalid bytes with unicode replacement characters, which would
28// break our requirement that offsets into the original and upper case strings
29// point to the same character.
30func toUpper(s string) string {
33 if c >= 'a' && c <= 'z' {
40func newParser(s string, smtputf8 bool, conn *conn) *parser {
41 return &parser{orig: s, upper: toUpper(s), smtputf8: smtputf8, conn: conn}
44func (p *parser) xerrorf(format string, args ...any) {
45 // For submission, send the remaining unparsed line. Otherwise, only log it.
47 errmsg := "bad syntax: " + fmt.Sprintf(format, args...)
48 remaining := fmt.Sprintf(" (remaining %q)", p.orig[p.o:])
49 if p.conn.account != nil {
51 err = errors.New(errmsg)
53 err = errors.New(errmsg + remaining)
57 panic(smtpError{smtp.C501BadParamSyntax, smtp.SeProto5Syntax2, errmsg, err, false, true})
60func (p *parser) xutf8localparterrorf() {
61 code := p.utf8LocalpartCode
63 code = smtp.C550MailboxUnavail
66 xsmtpUserErrorf(code, smtp.SeMsg6NonASCIIAddrNotPermitted7, "non-ascii address not permitted without smtputf8")
69func (p *parser) empty() bool {
70 return p.o == len(p.orig)
73// note: use xend() for check for end of line with remaining white space, to be used by commands.
74func (p *parser) xempty() {
75 if p.o != len(p.orig) {
76 p.xerrorf("expected end of line")
80// check we are at the end of a command.
81func (p *parser) xend() {
82 // For submission, we are strict.
83 if p.conn.submission {
88 for _, c := range rem {
89 if c != ' ' && c != '\t' {
90 p.xerrorf("trailing data, not white space: %q", rem)
95func (p *parser) hasPrefix(s string) bool {
96 return strings.HasPrefix(p.upper[p.o:], s)
99func (p *parser) take(s string) bool {
107func (p *parser) xtake(s string) {
109 p.xerrorf("expected %q", s)
113func (p *parser) space() bool {
117func (p *parser) xspace() {
121func (p *parser) xtaken(n int) string {
122 r := p.orig[p.o : p.o+n]
127func (p *parser) remainder() string {
133func (p *parser) peekchar() rune {
134 for _, c := range p.upper[p.o:] {
140func (p *parser) takefn1(what string, fn func(c rune, i int) bool) string {
142 p.xerrorf("need at least one char for %s", what)
144 for i, c := range p.upper[p.o:] {
147 p.xerrorf("expected at least one char for %s", what)
155func (p *parser) takefn1case(what string, fn func(c rune, i int) bool) string {
157 p.xerrorf("need at least one char for %s", what)
159 for i, c := range p.orig[p.o:] {
162 p.xerrorf("expected at least one char for %s", what)
170func (p *parser) takefn(fn func(c rune, i int) bool) string {
171 for i, c := range p.upper[p.o:] {
179// xrawReversePath returns the raw string between the <>'s. We cannot parse it
180// immediately, because if this is an IDNA (internationalization) address, we would
181// only see the SMTPUTF8 indicator after having parsed the reverse path here. So we
182// parse the raw data here, and validate it after having seen all parameters.
184func (p *parser) xrawReversePath() string {
186 s := p.takefn(func(c rune, i int) bool {
193// xbareReversePath parses a reverse-path without <>, as returned by
194// xrawReversePath. It takes smtputf8 into account.
196func (p *parser) xbareReversePath() smtp.Path {
201 p.utf8LocalpartCode = smtp.C550MailboxUnavail
203 p.utf8LocalpartCode = 0
208func (p *parser) xforwardPath() smtp.Path {
210 p.utf8LocalpartCode = smtp.C553BadMailbox
212 p.utf8LocalpartCode = 0
218func (p *parser) xpath() smtp.Path {
225 p.xerrorf("path longer than 256 octets")
230func (p *parser) xbarePath() smtp.Path {
231 // We parse but ignore any source routing.
245func (p *parser) xdomain() dns.Domain {
248 s += "." + p.xsubdomain()
250 d, err := dns.ParseDomain(s)
252 p.xerrorf("parsing domain name %q: %s", s, err)
256 p.xerrorf("domain longer than 255 octets")
263func (p *parser) xsubdomain() string {
264 return p.takefn1("subdomain", func(c rune, i int) bool {
265 return c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || i > 0 && c == '-' || c > 0x7f && p.smtputf8
270func (p *parser) xmailbox() smtp.Path {
271 localpart := p.xlocalpart()
273 return smtp.Path{Localpart: localpart, IPDomain: p.xipdomain(false)}
277func (p *parser) xldhstr() string {
278 return p.takefn1("ldh-str", func(c rune, i int) bool {
279 return c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || i == 0 && c == '-'
283// parse address-literal or domain.
284func (p *parser) xipdomain(isehlo bool) dns.IPDomain {
290 if !(c >= '0' && c <= '9') {
291 addrlit := p.xldhstr()
293 if !strings.EqualFold(addrlit, "IPv6") {
294 p.xerrorf("unrecognized address literal %q", addrlit)
298 ipaddr := p.takefn1("address literal", func(c rune, i int) bool {
302 ip := net.ParseIP(ipaddr)
304 p.xerrorf("invalid ip in address: %q", ipaddr)
306 isv4 := ip.To4() != nil
307 isAllowedSloppyIPv6Submission := func() bool {
308 // Mail user agents that submit are relatively likely to use IPs in EHLO and forget
309 // that an IPv6 address needs to be tagged as such. We can forgive them. For
310 // SMTP servers we are strict.
311 return isehlo && p.conn.submission && !mox.Pedantic && ip.To16() != nil
314 p.xerrorf("ip address is not ipv6")
315 } else if !ipv6 && !isv4 && !isAllowedSloppyIPv6Submission() {
316 if ip.To16() != nil {
317 p.xerrorf("ip address is ipv6, must use syntax [IPv6:...]")
319 p.xerrorf("ip address is not ipv4")
322 return dns.IPDomain{IP: ip}
324 return dns.IPDomain{Domain: p.xdomain()}
327// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
328func (p *parser) xlocalpart() smtp.Localpart {
331 if p.hasPrefix(`"`) {
332 s = p.xquotedString(true)
336 s += "." + p.xatom(true)
339 // In the wild, some services use large localparts for generated (bounce) addresses.
340 if mox.Pedantic && len(s) > 64 || len(s) > 128 {
342 p.xerrorf("localpart longer than 64 octets")
344 return smtp.Localpart(s)
348func (p *parser) xquotedString(islocalpart bool) string {
355 if c >= ' ' && c < 0x7f {
360 p.xerrorf("invalid localpart, bad escaped char %c", c)
370 if islocalpart && c > 0x7f && !p.smtputf8 {
371 p.xutf8localparterrorf()
373 if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || (c > 0x7f && p.smtputf8) {
377 p.xerrorf("invalid localpart, invalid character %c", c)
381func (p *parser) xchar() rune {
382 // We are careful to track invalid utf-8 properly.
384 p.xerrorf("need another character")
388 for i, c := range p.orig[p.o:] {
404func (p *parser) xatom(islocalpart bool) string {
405 return p.takefn1("atom", func(c rune, i int) bool {
407 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
410 if islocalpart && c > 0x7f && !p.smtputf8 {
411 p.xutf8localparterrorf()
413 return c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || (c > 0x7f && p.smtputf8)
418func (p *parser) xstring() string {
419 if p.peekchar() == '"' {
420 return p.xquotedString(false)
422 return p.xatom(false)
426func (p *parser) xparamKeyword() string {
427 return p.takefn1("parameter keyword", func(c rune, i int) bool {
428 return c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || (i > 0 && c == '-')
433func (p *parser) xparamValue() string {
434 return p.takefn1("parameter value", func(c rune, i int) bool {
435 return c > ' ' && c < 0x7f && c != '=' || (c > 0x7f && p.smtputf8)
439// for smtp parameters that take a numeric parameter with specified number of
440// digits, eg SIZE=... for MAIL FROM.
441func (p *parser) xnumber(maxDigits int) int64 {
442 s := p.takefn1("number", func(c rune, i int) bool {
443 return c >= '0' && c <= '9' && i < maxDigits
445 v, err := strconv.ParseInt(s, 10, 64)
447 p.xerrorf("bad number %q: %s", s, err)
452// sasl mechanism, for AUTH command.
454func (p *parser) xsaslMech() string {
455 return p.takefn1case("sasl-mech", func(c rune, i int) bool {
456 return i < 20 && (c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '-' || c == '_')
461func (p *parser) xtext() string {
465 if b >= 0x21 && b < 0x7f && b != '+' && b != '=' && b != ' ' {
475 for _, b := range x {
476 if b >= '0' && b <= '9' || b >= 'A' && b <= 'F' {
479 p.xerrorf("parsing xtext: invalid hexadecimal %q", x)
481 const hex = "0123456789ABCDEF"
482 b = byte(strings.IndexByte(hex, x[0])<<4) | byte(strings.IndexByte(hex, x[1])<<0)