1package smtp
2
3import (
4 "errors"
5 "fmt"
6 "strconv"
7 "strings"
8
9 "golang.org/x/text/unicode/norm"
10
11 "github.com/mjl-/mox/dns"
12)
13
14// Pedantic enables stricter parsing.
15var Pedantic bool
16
17var ErrBadAddress = errors.New("invalid email address")
18
19// Localpart is a decoded local part of an email address, before the "@".
20// For quoted strings, values do not hold the double quote or escaping backslashes.
21// An empty string can be a valid localpart.
22// Localparts are in Unicode NFC.
23type Localpart string
24
25// String returns a packed representation of an address, with proper escaping/quoting, for use in SMTP.
26func (lp Localpart) String() string {
27 // See ../rfc/5321:2322 ../rfc/6531:414
28 // First we try as dot-string. If not possible we make a quoted-string.
29 dotstr := true
30 t := strings.Split(string(lp), ".")
31 for _, e := range t {
32 for _, c := range e {
33 if c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c > 0x7f {
34 continue
35 }
36 switch c {
37 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
38 continue
39 }
40 dotstr = false
41 break
42 }
43 dotstr = dotstr && len(e) > 0
44 }
45 dotstr = dotstr && len(t) > 0
46 if dotstr {
47 return string(lp)
48 }
49
50 // Make quoted-string.
51 r := `"`
52 for _, b := range lp {
53 if b == '"' || b == '\\' {
54 r += "\\" + string(b)
55 } else {
56 r += string(b)
57 }
58 }
59 r += `"`
60 return r
61}
62
63// LogString returns the localpart as string for use in smtp, and an escaped
64// representation if it has non-ascii characters.
65func (lp Localpart) LogString() string {
66 s := lp.String()
67 qs := strconv.QuoteToASCII(s)
68 if qs != `"`+s+`"` {
69 s = "/" + qs
70 }
71 return s
72}
73
74// DSNString returns the localpart as string for use in a DSN.
75// utf8 indicates if the remote MTA supports utf8 messaging. If not, the 7bit DSN
76// encoding for "utf-8-addr-xtext" from RFC 6533 is used.
77func (lp Localpart) DSNString(utf8 bool) string {
78 if utf8 {
79 return lp.String()
80 }
81 // ../rfc/6533:259
82 r := ""
83 for _, c := range lp {
84 if c > 0x20 && c < 0x7f && c != '\\' && c != '+' && c != '=' {
85 r += string(c)
86 } else {
87 r += fmt.Sprintf(`\x{%x}`, c)
88 }
89 }
90 return r
91}
92
93// IsInternational returns if this is an internationalized local part, i.e. has
94// non-ASCII characters.
95func (lp Localpart) IsInternational() bool {
96 for _, c := range lp {
97 if c > 0x7f {
98 return true
99 }
100 }
101 return false
102}
103
104// Address is a parsed email address.
105type Address struct {
106 Localpart Localpart
107 Domain dns.Domain // todo: shouldn't we accept an ip address here too? and merge this type into smtp.Path.
108}
109
110// NewAddress returns an address.
111func NewAddress(localpart Localpart, domain dns.Domain) Address {
112 return Address{localpart, domain}
113}
114
115func (a Address) Path() Path {
116 return Path{Localpart: a.Localpart, IPDomain: dns.IPDomain{Domain: a.Domain}}
117}
118
119func (a Address) IsZero() bool {
120 return a == Address{}
121}
122
123// Pack returns the address in string form. If smtputf8 is true, the domain is
124// formatted with non-ASCII characters. If localpart has non-ASCII characters,
125// they are returned regardless of smtputf8.
126func (a Address) Pack(smtputf8 bool) string {
127 if a.IsZero() {
128 return ""
129 }
130 return a.Localpart.String() + "@" + a.Domain.XName(smtputf8)
131}
132
133// String returns the address in string form with non-ASCII characters.
134func (a Address) String() string {
135 if a.IsZero() {
136 return ""
137 }
138 return a.Localpart.String() + "@" + a.Domain.Name()
139}
140
141// LogString returns the address with with utf-8 in localpart and/or domain. In
142// case of an IDNA domain and/or quotable characters in the localpart, an address
143// with quoted/escaped localpart and ASCII domain is also returned.
144func (a Address) LogString() string {
145 if a.IsZero() {
146 return ""
147 }
148 s := a.Pack(true)
149 lp := a.Localpart.String()
150 qlp := strconv.QuoteToASCII(lp)
151 escaped := qlp != `"`+lp+`"`
152 if a.Domain.Unicode != "" || escaped {
153 if escaped {
154 lp = qlp
155 }
156 s += "/" + lp + "@" + a.Domain.ASCII
157 }
158 return s
159}
160
161// ParseAddress parses an email address. UTF-8 is allowed.
162// Returns ErrBadAddress for invalid addresses.
163func ParseAddress(s string) (address Address, err error) {
164 lp, rem, err := parseLocalPart(s)
165 if err != nil {
166 return Address{}, fmt.Errorf("%w: %s", ErrBadAddress, err)
167 }
168 if !strings.HasPrefix(rem, "@") {
169 return Address{}, fmt.Errorf("%w: expected @", ErrBadAddress)
170 }
171 rem = rem[1:]
172 d, err := dns.ParseDomain(rem)
173 if err != nil {
174 return Address{}, fmt.Errorf("%w: %s", ErrBadAddress, err)
175 }
176 return Address{lp, d}, err
177}
178
179// ParseNetMailAddress parses a not-quite-valid address as found in
180// net/mail.Address.Address.
181//
182// net/mail does parse quoted addresses properly, but stores the localpart
183// unquoted. So an address `" "@example.com` would be stored as ` @example.com`,
184// which we would fail to parse without special attention.
185func ParseNetMailAddress(a string) (address Address, err error) {
186 i := strings.LastIndex(a, "@")
187 if i < 0 {
188 return Address{}, fmt.Errorf("%w: missing @", ErrBadAddress)
189 }
190 addrStr := Localpart(a[:i]).String() + "@" + a[i+1:]
191 return ParseAddress(addrStr)
192}
193
194var ErrBadLocalpart = errors.New("invalid localpart")
195
196// ParseLocalpart parses the local part.
197// UTF-8 is allowed.
198// Returns ErrBadAddress for invalid addresses.
199func ParseLocalpart(s string) (localpart Localpart, err error) {
200 lp, rem, err := parseLocalPart(s)
201 if err != nil {
202 return "", err
203 }
204 if rem != "" {
205 return "", fmt.Errorf("%w: remaining after localpart: %q", ErrBadLocalpart, rem)
206 }
207 return lp, nil
208}
209
210func parseLocalPart(s string) (localpart Localpart, remain string, err error) {
211 p := &parser{s, 0}
212
213 defer func() {
214 x := recover()
215 if x == nil {
216 return
217 }
218 e, ok := x.(error)
219 if !ok {
220 panic(x)
221 }
222 err = fmt.Errorf("%w: %s", ErrBadLocalpart, e)
223 }()
224
225 lp := p.xlocalpart()
226 return lp, p.remainder(), nil
227}
228
229type parser struct {
230 s string
231 o int
232}
233
234func (p *parser) xerrorf(format string, args ...any) {
235 panic(fmt.Errorf(format, args...))
236}
237
238func (p *parser) hasPrefix(s string) bool {
239 return strings.HasPrefix(p.s[p.o:], s)
240}
241
242func (p *parser) take(s string) bool {
243 if p.hasPrefix(s) {
244 p.o += len(s)
245 return true
246 }
247 return false
248}
249
250func (p *parser) xtake(s string) {
251 if !p.take(s) {
252 p.xerrorf("expected %q", s)
253 }
254}
255
256func (p *parser) empty() bool {
257 return p.o == len(p.s)
258}
259
260func (p *parser) xtaken(n int) string {
261 r := p.s[p.o : p.o+n]
262 p.o += n
263 return r
264}
265
266func (p *parser) remainder() string {
267 r := p.s[p.o:]
268 p.o = len(p.s)
269 return r
270}
271
272// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
273func (p *parser) xlocalpart() Localpart {
274 // ../rfc/5321:2316
275 var s string
276 if p.hasPrefix(`"`) {
277 s = p.xquotedString()
278 } else {
279 s = p.xatom()
280 for p.take(".") {
281 s += "." + p.xatom()
282 }
283 }
284 // In the wild, some services use large localparts for generated (bounce) addresses.
285 if Pedantic && len(s) > 64 || len(s) > 128 {
286 // ../rfc/5321:3486
287 p.xerrorf("localpart longer than 64 octets")
288 }
289 return Localpart(norm.NFC.String(s))
290}
291
292func (p *parser) xquotedString() string {
293 p.xtake(`"`)
294 var s string
295 var esc bool
296 for {
297 c := p.xchar()
298 if esc {
299 if c >= ' ' && c < 0x7f {
300 s += string(c)
301 esc = false
302 continue
303 }
304 p.xerrorf("invalid localpart, bad escaped char %c", c)
305 }
306 if c == '\\' {
307 esc = true
308 continue
309 }
310 if c == '"' {
311 return s
312 }
313 // todo: should we be accepting utf8 for quoted strings?
314 if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || c > 0x7f {
315 s += string(c)
316 continue
317 }
318 p.xerrorf("invalid localpart, invalid character %c", c)
319 }
320}
321
322func (p *parser) xchar() rune {
323 // We are careful to track invalid utf-8 properly.
324 if p.empty() {
325 p.xerrorf("need another character")
326 }
327 var r rune
328 var o int
329 for i, c := range p.s[p.o:] {
330 if i > 0 {
331 o = i
332 break
333 }
334 r = c
335 }
336 if o == 0 {
337 p.o = len(p.s)
338 } else {
339 p.o += o
340 }
341 return r
342}
343
344func (p *parser) takefn1(what string, fn func(c rune, i int) bool) string {
345 if p.empty() {
346 p.xerrorf("need at least one char for %s", what)
347 }
348 for i, c := range p.s[p.o:] {
349 if !fn(c, i) {
350 if i == 0 {
351 p.xerrorf("expected at least one char for %s, got char %c", what, c)
352 }
353 return p.xtaken(i)
354 }
355 }
356 return p.remainder()
357}
358
359func (p *parser) xatom() string {
360 return p.takefn1("atom", func(c rune, i int) bool {
361 switch c {
362 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
363 return true
364 }
365 return isalphadigit(c) || c > 0x7f
366 })
367}
368
369func isalpha(c rune) bool {
370 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
371}
372
373func isdigit(c rune) bool {
374 return c >= '0' && c <= '9'
375}
376
377func isalphadigit(c rune) bool {
378 return isalpha(c) || isdigit(c)
379}
380