1package smtp
2
3import (
4 "errors"
5 "fmt"
6 "strconv"
7 "strings"
8
9 "golang.org/x/text/unicode/norm"
10
11 "github.com/mjl-/mox/dns"
12)
13
14// Pedantic enables stricter parsing.
15var Pedantic bool
16
17var ErrBadAddress = errors.New("invalid email address")
18
19// Localpart is a decoded local part of an email address, before the "@".
20// For quoted strings, values do not hold the double quote or escaping backslashes.
21// An empty string can be a valid localpart.
22// Localparts are in Unicode NFC.
23type Localpart string
24
25// String returns a packed representation of an address, with proper escaping/quoting, for use in SMTP.
26func (lp Localpart) String() string {
27 // See ../rfc/5321:2322 ../rfc/6531:414
28 // First we try as dot-string. If not possible we make a quoted-string.
29 dotstr := true
30 t := strings.Split(string(lp), ".")
31 for _, e := range t {
32 for _, c := range e {
33 if c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c > 0x7f {
34 continue
35 }
36 switch c {
37 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
38 continue
39 }
40 dotstr = false
41 break
42 }
43 dotstr = dotstr && len(e) > 0
44 }
45 dotstr = dotstr && len(t) > 0
46 if dotstr {
47 return string(lp)
48 }
49
50 // Make quoted-string.
51 r := `"`
52 for _, b := range lp {
53 if b == '"' || b == '\\' {
54 r += "\\" + string(b)
55 } else {
56 r += string(b)
57 }
58 }
59 r += `"`
60 return r
61}
62
63// LogString returns the localpart as string for use in smtp, and an escaped
64// representation if it has non-ascii characters.
65func (lp Localpart) LogString() string {
66 s := lp.String()
67 qs := strconv.QuoteToASCII(s)
68 if qs != `"`+s+`"` {
69 s = "/" + qs
70 }
71 return s
72}
73
74// DSNString returns the localpart as string for use in a DSN.
75// utf8 indicates if the remote MTA supports utf8 messaging. If not, the 7bit DSN
76// encoding for "utf-8-addr-xtext" from RFC 6533 is used.
77func (lp Localpart) DSNString(utf8 bool) string {
78 if utf8 {
79 return lp.String()
80 }
81 // ../rfc/6533:259
82 r := ""
83 for _, c := range lp {
84 if c > 0x20 && c < 0x7f && c != '\\' && c != '+' && c != '=' {
85 r += string(c)
86 } else {
87 r += fmt.Sprintf(`\x{%x}`, c)
88 }
89 }
90 return r
91}
92
93// IsInternational returns if this is an internationalized local part, i.e. has
94// non-ASCII characters.
95func (lp Localpart) IsInternational() bool {
96 for _, c := range lp {
97 if c > 0x7f {
98 return true
99 }
100 }
101 return false
102}
103
104// Address is a parsed email address.
105type Address struct {
106 Localpart Localpart
107 Domain dns.Domain // todo: shouldn't we accept an ip address here too? and merge this type into smtp.Path.
108}
109
110// NewAddress returns an address.
111func NewAddress(localpart Localpart, domain dns.Domain) Address {
112 return Address{localpart, domain}
113}
114
115func (a Address) Path() Path {
116 return Path{Localpart: a.Localpart, IPDomain: dns.IPDomain{Domain: a.Domain}}
117}
118
119func (a Address) IsZero() bool {
120 return a == Address{}
121}
122
123// Pack returns the address in string form. If smtputf8 is true, the domain is
124// formatted with non-ASCII characters. If localpart has non-ASCII characters,
125// they are returned regardless of smtputf8.
126func (a Address) Pack(smtputf8 bool) string {
127 if a.IsZero() {
128 return ""
129 }
130 return a.Localpart.String() + "@" + a.Domain.XName(smtputf8)
131}
132
133// String returns the address in string form with non-ASCII characters.
134func (a Address) String() string {
135 if a.IsZero() {
136 return ""
137 }
138 return a.Localpart.String() + "@" + a.Domain.Name()
139}
140
141// LogString returns the address with with utf-8 in localpart and/or domain. In
142// case of an IDNA domain and/or quotable characters in the localpart, an address
143// with quoted/escaped localpart and ASCII domain is also returned.
144func (a Address) LogString() string {
145 if a.IsZero() {
146 return ""
147 }
148 s := a.Pack(true)
149 lp := a.Localpart.String()
150 qlp := strconv.QuoteToASCII(lp)
151 escaped := qlp != `"`+lp+`"`
152 if a.Domain.Unicode != "" || escaped {
153 if escaped {
154 lp = qlp
155 }
156 s += "/" + lp + "@" + a.Domain.ASCII
157 }
158 return s
159}
160
161// ParseAddress parses an email address. UTF-8 is allowed.
162// Returns ErrBadAddress for invalid addresses.
163func ParseAddress(s string) (address Address, err error) {
164 lp, rem, err := parseLocalPart(s)
165 if err != nil {
166 return Address{}, fmt.Errorf("%w: %s", ErrBadAddress, err)
167 }
168 if !strings.HasPrefix(rem, "@") {
169 return Address{}, fmt.Errorf("%w: expected @", ErrBadAddress)
170 }
171 rem = rem[1:]
172 d, err := dns.ParseDomain(rem)
173 if err != nil {
174 return Address{}, fmt.Errorf("%w: %s", ErrBadAddress, err)
175 }
176 return Address{lp, d}, err
177}
178
179var ErrBadLocalpart = errors.New("invalid localpart")
180
181// ParseLocalpart parses the local part.
182// UTF-8 is allowed.
183// Returns ErrBadAddress for invalid addresses.
184func ParseLocalpart(s string) (localpart Localpart, err error) {
185 lp, rem, err := parseLocalPart(s)
186 if err != nil {
187 return "", err
188 }
189 if rem != "" {
190 return "", fmt.Errorf("%w: remaining after localpart: %q", ErrBadLocalpart, rem)
191 }
192 return lp, nil
193}
194
195func parseLocalPart(s string) (localpart Localpart, remain string, err error) {
196 p := &parser{s, 0}
197
198 defer func() {
199 x := recover()
200 if x == nil {
201 return
202 }
203 e, ok := x.(error)
204 if !ok {
205 panic(x)
206 }
207 err = fmt.Errorf("%w: %s", ErrBadLocalpart, e)
208 }()
209
210 lp := p.xlocalpart()
211 return lp, p.remainder(), nil
212}
213
214type parser struct {
215 s string
216 o int
217}
218
219func (p *parser) xerrorf(format string, args ...any) {
220 panic(fmt.Errorf(format, args...))
221}
222
223func (p *parser) hasPrefix(s string) bool {
224 return strings.HasPrefix(p.s[p.o:], s)
225}
226
227func (p *parser) take(s string) bool {
228 if p.hasPrefix(s) {
229 p.o += len(s)
230 return true
231 }
232 return false
233}
234
235func (p *parser) xtake(s string) {
236 if !p.take(s) {
237 p.xerrorf("expected %q", s)
238 }
239}
240
241func (p *parser) empty() bool {
242 return p.o == len(p.s)
243}
244
245func (p *parser) xtaken(n int) string {
246 r := p.s[p.o : p.o+n]
247 p.o += n
248 return r
249}
250
251func (p *parser) remainder() string {
252 r := p.s[p.o:]
253 p.o = len(p.s)
254 return r
255}
256
257// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
258func (p *parser) xlocalpart() Localpart {
259 // ../rfc/5321:2316
260 var s string
261 if p.hasPrefix(`"`) {
262 s = p.xquotedString()
263 } else {
264 s = p.xatom()
265 for p.take(".") {
266 s += "." + p.xatom()
267 }
268 }
269 // In the wild, some services use large localparts for generated (bounce) addresses.
270 if Pedantic && len(s) > 64 || len(s) > 128 {
271 // ../rfc/5321:3486
272 p.xerrorf("localpart longer than 64 octets")
273 }
274 return Localpart(norm.NFC.String(s))
275}
276
277func (p *parser) xquotedString() string {
278 p.xtake(`"`)
279 var s string
280 var esc bool
281 for {
282 c := p.xchar()
283 if esc {
284 if c >= ' ' && c < 0x7f {
285 s += string(c)
286 esc = false
287 continue
288 }
289 p.xerrorf("invalid localpart, bad escaped char %c", c)
290 }
291 if c == '\\' {
292 esc = true
293 continue
294 }
295 if c == '"' {
296 return s
297 }
298 // todo: should we be accepting utf8 for quoted strings?
299 if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || c > 0x7f {
300 s += string(c)
301 continue
302 }
303 p.xerrorf("invalid localpart, invalid character %c", c)
304 }
305}
306
307func (p *parser) xchar() rune {
308 // We are careful to track invalid utf-8 properly.
309 if p.empty() {
310 p.xerrorf("need another character")
311 }
312 var r rune
313 var o int
314 for i, c := range p.s[p.o:] {
315 if i > 0 {
316 o = i
317 break
318 }
319 r = c
320 }
321 if o == 0 {
322 p.o = len(p.s)
323 } else {
324 p.o += o
325 }
326 return r
327}
328
329func (p *parser) takefn1(what string, fn func(c rune, i int) bool) string {
330 if p.empty() {
331 p.xerrorf("need at least one char for %s", what)
332 }
333 for i, c := range p.s[p.o:] {
334 if !fn(c, i) {
335 if i == 0 {
336 p.xerrorf("expected at least one char for %s, got char %c", what, c)
337 }
338 return p.xtaken(i)
339 }
340 }
341 return p.remainder()
342}
343
344func (p *parser) xatom() string {
345 return p.takefn1("atom", func(c rune, i int) bool {
346 switch c {
347 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
348 return true
349 }
350 return isalphadigit(c) || c > 0x7f
351 })
352}
353
354func isalpha(c rune) bool {
355 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
356}
357
358func isdigit(c rune) bool {
359 return c >= '0' && c <= '9'
360}
361
362func isalphadigit(c rune) bool {
363 return isalpha(c) || isdigit(c)
364}
365