1package smtp
2
3import (
4 "errors"
5 "fmt"
6 "strconv"
7 "strings"
8
9 "github.com/mjl-/mox/dns"
10)
11
12// Pedantic enables stricter parsing.
13var Pedantic bool
14
15var ErrBadAddress = errors.New("invalid email address")
16
17// Localpart is a decoded local part of an email address, before the "@".
18// For quoted strings, values do not hold the double quote or escaping backslashes.
19// An empty string can be a valid localpart.
20type Localpart string
21
22// String returns a packed representation of an address, with proper escaping/quoting, for use in SMTP.
23func (lp Localpart) String() string {
24 // See ../rfc/5321:2322 ../rfc/6531:414
25 // First we try as dot-string. If not possible we make a quoted-string.
26 dotstr := true
27 t := strings.Split(string(lp), ".")
28 for _, e := range t {
29 for _, c := range e {
30 if c >= '0' && c <= '9' || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c > 0x7f {
31 continue
32 }
33 switch c {
34 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
35 continue
36 }
37 dotstr = false
38 break
39 }
40 dotstr = dotstr && len(e) > 0
41 }
42 dotstr = dotstr && len(t) > 0
43 if dotstr {
44 return string(lp)
45 }
46
47 // Make quoted-string.
48 r := `"`
49 for _, b := range lp {
50 if b == '"' || b == '\\' {
51 r += "\\" + string(b)
52 } else {
53 r += string(b)
54 }
55 }
56 r += `"`
57 return r
58}
59
60// LogString returns the localpart as string for use in smtp, and an escaped
61// representation if it has non-ascii characters.
62func (lp Localpart) LogString() string {
63 s := lp.String()
64 qs := strconv.QuoteToASCII(s)
65 if qs != `"`+s+`"` {
66 s = "/" + qs
67 }
68 return s
69}
70
71// DSNString returns the localpart as string for use in a DSN.
72// utf8 indicates if the remote MTA supports utf8 messaging. If not, the 7bit DSN
73// encoding for "utf-8-addr-xtext" from RFC 6533 is used.
74func (lp Localpart) DSNString(utf8 bool) string {
75 if utf8 {
76 return lp.String()
77 }
78 // ../rfc/6533:259
79 r := ""
80 for _, c := range lp {
81 if c > 0x20 && c < 0x7f && c != '\\' && c != '+' && c != '=' {
82 r += string(c)
83 } else {
84 r += fmt.Sprintf(`\x{%x}`, c)
85 }
86 }
87 return r
88}
89
90// IsInternational returns if this is an internationalized local part, i.e. has
91// non-ASCII characters.
92func (lp Localpart) IsInternational() bool {
93 for _, c := range lp {
94 if c > 0x7f {
95 return true
96 }
97 }
98 return false
99}
100
101// Address is a parsed email address.
102type Address struct {
103 Localpart Localpart
104 Domain dns.Domain // todo: shouldn't we accept an ip address here too? and merge this type into smtp.Path.
105}
106
107// NewAddress returns an address.
108func NewAddress(localpart Localpart, domain dns.Domain) Address {
109 return Address{localpart, domain}
110}
111
112func (a Address) Path() Path {
113 return Path{Localpart: a.Localpart, IPDomain: dns.IPDomain{Domain: a.Domain}}
114}
115
116func (a Address) IsZero() bool {
117 return a == Address{}
118}
119
120// Pack returns the address in string form. If smtputf8 is true, the domain is
121// formatted with non-ASCII characters. If localpart has non-ASCII characters,
122// they are returned regardless of smtputf8.
123func (a Address) Pack(smtputf8 bool) string {
124 if a.IsZero() {
125 return ""
126 }
127 return a.Localpart.String() + "@" + a.Domain.XName(smtputf8)
128}
129
130// String returns the address in string form with non-ASCII characters.
131func (a Address) String() string {
132 if a.IsZero() {
133 return ""
134 }
135 return a.Localpart.String() + "@" + a.Domain.Name()
136}
137
138// LogString returns the address with with utf-8 in localpart and/or domain. In
139// case of an IDNA domain and/or quotable characters in the localpart, an address
140// with quoted/escaped localpart and ASCII domain is also returned.
141func (a Address) LogString() string {
142 if a.IsZero() {
143 return ""
144 }
145 s := a.Pack(true)
146 lp := a.Localpart.String()
147 qlp := strconv.QuoteToASCII(lp)
148 escaped := qlp != `"`+lp+`"`
149 if a.Domain.Unicode != "" || escaped {
150 if escaped {
151 lp = qlp
152 }
153 s += "/" + lp + "@" + a.Domain.ASCII
154 }
155 return s
156}
157
158// ParseAddress parses an email address. UTF-8 is allowed.
159// Returns ErrBadAddress for invalid addresses.
160func ParseAddress(s string) (address Address, err error) {
161 lp, rem, err := parseLocalPart(s)
162 if err != nil {
163 return Address{}, fmt.Errorf("%w: %s", ErrBadAddress, err)
164 }
165 if !strings.HasPrefix(rem, "@") {
166 return Address{}, fmt.Errorf("%w: expected @", ErrBadAddress)
167 }
168 rem = rem[1:]
169 d, err := dns.ParseDomain(rem)
170 if err != nil {
171 return Address{}, fmt.Errorf("%w: %s", ErrBadAddress, err)
172 }
173 return Address{lp, d}, err
174}
175
176var ErrBadLocalpart = errors.New("invalid localpart")
177
178// ParseLocalpart parses the local part.
179// UTF-8 is allowed.
180// Returns ErrBadAddress for invalid addresses.
181func ParseLocalpart(s string) (localpart Localpart, err error) {
182 lp, rem, err := parseLocalPart(s)
183 if err != nil {
184 return "", err
185 }
186 if rem != "" {
187 return "", fmt.Errorf("%w: remaining after localpart: %q", ErrBadLocalpart, rem)
188 }
189 return lp, nil
190}
191
192func parseLocalPart(s string) (localpart Localpart, remain string, err error) {
193 p := &parser{s, 0}
194
195 defer func() {
196 x := recover()
197 if x == nil {
198 return
199 }
200 e, ok := x.(error)
201 if !ok {
202 panic(x)
203 }
204 err = fmt.Errorf("%w: %s", ErrBadLocalpart, e)
205 }()
206
207 lp := p.xlocalpart()
208 return lp, p.remainder(), nil
209}
210
211type parser struct {
212 s string
213 o int
214}
215
216func (p *parser) xerrorf(format string, args ...any) {
217 panic(fmt.Errorf(format, args...))
218}
219
220func (p *parser) hasPrefix(s string) bool {
221 return strings.HasPrefix(p.s[p.o:], s)
222}
223
224func (p *parser) take(s string) bool {
225 if p.hasPrefix(s) {
226 p.o += len(s)
227 return true
228 }
229 return false
230}
231
232func (p *parser) xtake(s string) {
233 if !p.take(s) {
234 p.xerrorf("expected %q", s)
235 }
236}
237
238func (p *parser) empty() bool {
239 return p.o == len(p.s)
240}
241
242func (p *parser) xtaken(n int) string {
243 r := p.s[p.o : p.o+n]
244 p.o += n
245 return r
246}
247
248func (p *parser) remainder() string {
249 r := p.s[p.o:]
250 p.o = len(p.s)
251 return r
252}
253
254// todo: reduce duplication between implementations: ../smtp/address.go:/xlocalpart ../dkim/parser.go:/xlocalpart ../smtpserver/parse.go:/xlocalpart
255func (p *parser) xlocalpart() Localpart {
256 // ../rfc/5321:2316
257 var s string
258 if p.hasPrefix(`"`) {
259 s = p.xquotedString()
260 } else {
261 s = p.xatom()
262 for p.take(".") {
263 s += "." + p.xatom()
264 }
265 }
266 // In the wild, some services use large localparts for generated (bounce) addresses.
267 if Pedantic && len(s) > 64 || len(s) > 128 {
268 // ../rfc/5321:3486
269 p.xerrorf("localpart longer than 64 octets")
270 }
271 return Localpart(s)
272}
273
274func (p *parser) xquotedString() string {
275 p.xtake(`"`)
276 var s string
277 var esc bool
278 for {
279 c := p.xchar()
280 if esc {
281 if c >= ' ' && c < 0x7f {
282 s += string(c)
283 esc = false
284 continue
285 }
286 p.xerrorf("invalid localpart, bad escaped char %c", c)
287 }
288 if c == '\\' {
289 esc = true
290 continue
291 }
292 if c == '"' {
293 return s
294 }
295 // todo: should we be accepting utf8 for quoted strings?
296 if c >= ' ' && c < 0x7f && c != '\\' && c != '"' || c > 0x7f {
297 s += string(c)
298 continue
299 }
300 p.xerrorf("invalid localpart, invalid character %c", c)
301 }
302}
303
304func (p *parser) xchar() rune {
305 // We are careful to track invalid utf-8 properly.
306 if p.empty() {
307 p.xerrorf("need another character")
308 }
309 var r rune
310 var o int
311 for i, c := range p.s[p.o:] {
312 if i > 0 {
313 o = i
314 break
315 }
316 r = c
317 }
318 if o == 0 {
319 p.o = len(p.s)
320 } else {
321 p.o += o
322 }
323 return r
324}
325
326func (p *parser) takefn1(what string, fn func(c rune, i int) bool) string {
327 if p.empty() {
328 p.xerrorf("need at least one char for %s", what)
329 }
330 for i, c := range p.s[p.o:] {
331 if !fn(c, i) {
332 if i == 0 {
333 p.xerrorf("expected at least one char for %s, got char %c", what, c)
334 }
335 return p.xtaken(i)
336 }
337 }
338 return p.remainder()
339}
340
341func (p *parser) xatom() string {
342 return p.takefn1("atom", func(c rune, i int) bool {
343 switch c {
344 case '!', '#', '$', '%', '&', '\'', '*', '+', '-', '/', '=', '?', '^', '_', '`', '{', '|', '}', '~':
345 return true
346 }
347 return isalphadigit(c) || c > 0x7f
348 })
349}
350
351func isalpha(c rune) bool {
352 return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z'
353}
354
355func isdigit(c rune) bool {
356 return c >= '0' && c <= '9'
357}
358
359func isalphadigit(c rune) bool {
360 return isalpha(c) || isdigit(c)
361}
362