1package dsn
2
3import (
4 "bufio"
5 "fmt"
6 "io"
7 "log/slog"
8 "net/textproto"
9 "strconv"
10 "strings"
11 "time"
12
13 "github.com/mjl-/mox/dns"
14 "github.com/mjl-/mox/message"
15 "github.com/mjl-/mox/mlog"
16 "github.com/mjl-/mox/smtp"
17)
18
19// Parse reads a DSN message.
20//
21// A DSN is a multipart internet mail message with 2 or 3 parts: human-readable
22// text, machine-parsable text, and optional original message or headers.
23//
24// The first return value is the machine-parsed DSN message. The second value is
25// the entire MIME multipart message. Use its Parts field to access the
26// human-readable text and optional original message/headers.
27func Parse(elog *slog.Logger, r io.ReaderAt) (*Message, *message.Part, error) {
28 log := mlog.New("dsn", elog)
29
30 // DSNs can mix and match subtypes with and without utf-8. ../rfc/6533:441
31
32 part, err := message.Parse(log.Logger, false, r)
33 if err != nil {
34 return nil, nil, fmt.Errorf("parsing message: %v", err)
35 }
36 if part.MediaType != "MULTIPART" || part.MediaSubType != "REPORT" {
37 return nil, nil, fmt.Errorf(`message has content-type %q, must have "message/report"`, strings.ToLower(part.MediaType+"/"+part.MediaSubType))
38 }
39 err = part.Walk(log.Logger, nil)
40 if err != nil {
41 return nil, nil, fmt.Errorf("parsing message parts: %v", err)
42 }
43 nparts := len(part.Parts)
44 if nparts != 2 && nparts != 3 {
45 return nil, nil, fmt.Errorf("invalid dsn, got %d multipart parts, 2 or 3 required", nparts)
46 }
47 p0 := part.Parts[0]
48 if !(p0.MediaType == "" && p0.MediaSubType == "") && !(p0.MediaType == "TEXT" && p0.MediaSubType == "PLAIN") {
49 return nil, nil, fmt.Errorf(`invalid dsn, first part has content-type %q, must have "text/plain"`, strings.ToLower(p0.MediaType+"/"+p0.MediaSubType))
50 }
51
52 p1 := part.Parts[1]
53 var m *Message
54 if !(p1.MediaType == "MESSAGE" && (p1.MediaSubType == "DELIVERY-STATUS" || p1.MediaSubType == "GLOBAL-DELIVERY-STATUS")) {
55 return nil, nil, fmt.Errorf(`invalid dsn, second part has content-type %q, must have "message/delivery-status" or "message/global-delivery-status"`, strings.ToLower(p1.MediaType+"/"+p1.MediaSubType))
56 }
57 utf8 := p1.MediaSubType == "GLOBAL-DELIVERY-STATUS"
58 m, err = Decode(p1.Reader(), utf8)
59 if err != nil {
60 return nil, nil, fmt.Errorf("parsing dsn delivery-status part: %v", err)
61 }
62
63 addressPath := func(a message.Address) (smtp.Path, error) {
64 d, err := dns.ParseDomain(a.Host)
65 if err != nil {
66 return smtp.Path{}, fmt.Errorf("parsing domain: %v", err)
67 }
68 lp, err := smtp.ParseLocalpart(a.User)
69 if err != nil {
70 return smtp.Path{}, fmt.Errorf("parsing localpart: %v", err)
71 }
72 return smtp.Path{Localpart: lp, IPDomain: dns.IPDomain{Domain: d}}, nil
73 }
74 if len(part.Envelope.From) == 1 {
75 m.From, err = addressPath(part.Envelope.From[0])
76 if err != nil {
77 return nil, nil, fmt.Errorf("parsing From-header: %v", err)
78 }
79 }
80 if len(part.Envelope.To) == 1 {
81 m.To, err = addressPath(part.Envelope.To[0])
82 if err != nil {
83 return nil, nil, fmt.Errorf("parsing To-header: %v", err)
84 }
85 }
86 m.Subject = part.Envelope.Subject
87 buf, err := io.ReadAll(p0.ReaderUTF8OrBinary())
88 if err != nil {
89 return nil, nil, fmt.Errorf("reading human-readable text part: %v", err)
90 }
91 m.TextBody = strings.ReplaceAll(string(buf), "\r\n", "\n")
92
93 if nparts == 2 {
94 return m, &part, nil
95 }
96
97 p2 := part.Parts[2]
98 ct := strings.ToLower(p2.MediaType + "/" + p2.MediaSubType)
99 switch ct {
100 case "text/rfc822-headers":
101 case "message/global-headers":
102 case "message/rfc822":
103 case "message/global":
104 default:
105 return nil, nil, fmt.Errorf("invalid content-type %q for optional third part with original message/headers", ct)
106 }
107
108 return m, &part, nil
109}
110
111// Decode parses the (global) delivery-status part of a DSN.
112//
113// utf8 indicates if UTF-8 is allowed for this message, if used by the media
114// subtype of the message parts.
115func Decode(r io.Reader, utf8 bool) (*Message, error) {
116 m := Message{SMTPUTF8: utf8}
117
118 // We are using textproto.Reader to read mime headers. It requires a header section ending in \r\n.
119 // ../rfc/3464:486
120 b := bufio.NewReader(io.MultiReader(r, strings.NewReader("\r\n")))
121 mr := textproto.NewReader(b)
122
123 // Read per-message lines.
124 // ../rfc/3464:1522 ../rfc/6533:366
125 msgh, err := mr.ReadMIMEHeader()
126 if err != nil {
127 return nil, fmt.Errorf("reading per-message lines: %v", err)
128 }
129 for k, l := range msgh {
130 if len(l) != 1 {
131 return nil, fmt.Errorf("multiple values for %q: %v", k, l)
132 }
133 v := l[0]
134 // note: headers are in canonical form, as parsed by textproto.
135 switch k {
136 case "Original-Envelope-Id":
137 m.OriginalEnvelopeID = v
138 case "Reporting-Mta":
139 mta, err := parseMTA(v, utf8)
140 if err != nil {
141 return nil, fmt.Errorf("parsing reporting-mta: %v", err)
142 }
143 m.ReportingMTA = mta
144 case "Dsn-Gateway":
145 mta, err := parseMTA(v, utf8)
146 if err != nil {
147 return nil, fmt.Errorf("parsing dsn-gateway: %v", err)
148 }
149 m.DSNGateway = mta
150 case "Received-From-Mta":
151 mta, err := parseMTA(v, utf8)
152 if err != nil {
153 return nil, fmt.Errorf("parsing received-from-mta: %v", err)
154 }
155 d, err := dns.ParseDomain(mta)
156 if err != nil {
157 return nil, fmt.Errorf("parsing received-from-mta domain %q: %v", mta, err)
158 }
159 m.ReceivedFromMTA = smtp.Ehlo{Name: dns.IPDomain{Domain: d}}
160 case "Arrival-Date":
161 tm, err := parseDateTime(v)
162 if err != nil {
163 return nil, fmt.Errorf("parsing arrival-date: %v", err)
164 }
165 m.ArrivalDate = tm
166 default:
167 // We'll assume it is an extension field, we'll ignore it for now.
168 }
169 }
170 m.MessageHeader = msgh
171
172 required := []string{"Reporting-Mta"}
173 for _, req := range required {
174 if _, ok := msgh[req]; !ok {
175 return nil, fmt.Errorf("missing required recipient field %q", req)
176 }
177 }
178
179 rh, err := parseRecipientHeader(mr, utf8)
180 if err != nil {
181 return nil, fmt.Errorf("reading per-recipient header: %v", err)
182 }
183 m.Recipients = []Recipient{rh}
184 for {
185 if _, err := b.Peek(1); err == io.EOF {
186 break
187 }
188 rh, err := parseRecipientHeader(mr, utf8)
189 if err != nil {
190 return nil, fmt.Errorf("reading another per-recipient header: %v", err)
191 }
192 m.Recipients = append(m.Recipients, rh)
193 }
194 return &m, nil
195}
196
197// ../rfc/3464:1530 ../rfc/6533:370
198func parseRecipientHeader(mr *textproto.Reader, utf8 bool) (Recipient, error) {
199 var r Recipient
200 h, err := mr.ReadMIMEHeader()
201 if err != nil {
202 return Recipient{}, err
203 }
204
205 for k, l := range h {
206 if len(l) != 1 {
207 return Recipient{}, fmt.Errorf("multiple values for %q: %v", k, l)
208 }
209 v := l[0]
210 // note: headers are in canonical form, as parsed by textproto.
211 var err error
212 switch k {
213 case "Original-Recipient":
214 r.OriginalRecipient, err = parseAddress(v, utf8)
215 case "Final-Recipient":
216 r.FinalRecipient, err = parseAddress(v, utf8)
217 case "Action":
218 a := Action(strings.ToLower(v))
219 actions := []Action{Failed, Delayed, Delivered, Relayed, Expanded}
220 var ok bool
221 for _, x := range actions {
222 if a == x {
223 ok = true
224 r.Action = a
225 break
226 }
227 }
228 if !ok {
229 err = fmt.Errorf("unrecognized action %q", v)
230 }
231 case "Status":
232 // todo: parse the enhanced status code?
233 r.Status = v
234 t := strings.SplitN(v, "(", 2)
235 v = strings.TrimSpace(v)
236 if len(t) == 2 && strings.HasSuffix(v, ")") {
237 r.Status = strings.TrimSpace(t[0])
238 r.StatusComment = strings.TrimSpace(strings.TrimSuffix(t[1], ")"))
239 }
240
241 case "Remote-Mta":
242 r.RemoteMTA = NameIP{Name: v}
243 case "Diagnostic-Code":
244 // ../rfc/3464:518
245 t := strings.SplitN(v, ";", 2)
246 dt := strings.TrimSpace(t[0])
247 if strings.ToLower(dt) != "smtp" {
248 err = fmt.Errorf("unknown diagnostic-type %q, expected smtp", dt)
249 } else if len(t) != 2 {
250 err = fmt.Errorf("missing semicolon to separate diagnostic-type from code")
251 } else {
252 r.DiagnosticCodeSMTP = strings.TrimSpace(t[1])
253 }
254 case "Last-Attempt-Date":
255 r.LastAttemptDate, err = parseDateTime(v)
256 case "Final-Log-Id":
257 r.FinalLogID = v
258 case "Will-Retry-Until":
259 tm, err := parseDateTime(v)
260 if err == nil {
261 r.WillRetryUntil = &tm
262 }
263 default:
264 // todo future: parse localized diagnostic text field?
265 // We'll assume it is an extension field, we'll ignore it for now.
266 }
267 if err != nil {
268 return Recipient{}, fmt.Errorf("parsing field %q %q: %v", k, v, err)
269 }
270 }
271
272 required := []string{"Final-Recipient", "Action", "Status"}
273 for _, req := range required {
274 if _, ok := h[req]; !ok {
275 return Recipient{}, fmt.Errorf("missing required recipient field %q", req)
276 }
277 }
278
279 r.Header = h
280 return r, nil
281}
282
283// ../rfc/3464:525
284func parseMTA(s string, utf8 bool) (string, error) {
285 s = removeComments(s)
286 t := strings.SplitN(s, ";", 2)
287 if len(t) != 2 {
288 return "", fmt.Errorf("missing semicolon that splits type and name")
289 }
290 k := strings.TrimSpace(t[0])
291 if !strings.EqualFold(k, "dns") {
292 return "", fmt.Errorf("unknown type %q, expected dns", k)
293 }
294 return strings.TrimSpace(t[1]), nil
295}
296
297func parseDateTime(s string) (time.Time, error) {
298 s = removeComments(s)
299 return time.Parse(message.RFC5322Z, s)
300}
301
302func parseAddress(s string, utf8 bool) (smtp.Path, error) {
303 s = removeComments(s)
304 t := strings.SplitN(s, ";", 2)
305 // ../rfc/3464:513 ../rfc/6533:250
306 addrType := strings.ToLower(strings.TrimSpace(t[0]))
307 if len(t) != 2 {
308 return smtp.Path{}, fmt.Errorf("missing semicolon that splits address type and address")
309 } else if addrType == "utf-8" {
310 if !utf8 {
311 return smtp.Path{}, fmt.Errorf("utf-8 address type for non-utf-8 dsn")
312 }
313 } else if addrType != "rfc822" {
314 return smtp.Path{}, fmt.Errorf("unrecognized address type %q, expected rfc822", addrType)
315 }
316 s = strings.TrimSpace(t[1])
317 if !utf8 {
318 for _, c := range s {
319 if c > 0x7f {
320 return smtp.Path{}, fmt.Errorf("non-ascii without utf-8 enabled")
321 }
322 }
323 }
324 // todo: more proper parser
325 t = strings.Split(s, "@")
326 if len(t) == 1 {
327 return smtp.Path{}, fmt.Errorf("invalid email address")
328 }
329 d, err := dns.ParseDomain(t[len(t)-1])
330 if err != nil {
331 return smtp.Path{}, fmt.Errorf("parsing domain: %v", err)
332 }
333 var lp string
334 var esc string
335 lead := strings.Join(t[:len(t)-1], "@")
336 for _, c := range lead {
337 if esc == "" && c == '\\' || esc == `\` && (c == 'x' || c == 'X') || esc == `\x` && c == '{' {
338 if c == 'X' {
339 c = 'x'
340 }
341 esc += string(c)
342 } else if strings.HasPrefix(esc, `\x{`) {
343 if c == '}' {
344 c, err := strconv.ParseInt(esc[3:], 16, 32)
345 if err != nil {
346 return smtp.Path{}, fmt.Errorf("parsing localpart with hexpoint: %v", err)
347 }
348 lp += string(rune(c))
349 esc = ""
350 } else {
351 esc += string(c)
352 }
353 } else {
354 lp += string(c)
355 }
356 }
357 if esc != "" {
358 return smtp.Path{}, fmt.Errorf("parsing localpart: unfinished embedded unicode char")
359 }
360 localpart, err := smtp.ParseLocalpart(lp)
361 if err != nil {
362 return smtp.Path{}, fmt.Errorf("parsing localpart: %v", err)
363 }
364 p := smtp.Path{Localpart: localpart, IPDomain: dns.IPDomain{Domain: d}}
365 return p, nil
366}
367
368func removeComments(s string) string {
369 n := 0
370 r := ""
371 for _, c := range s {
372 if c == '(' {
373 n++
374 } else if c == ')' && n > 0 {
375 n--
376 } else if n == 0 {
377 r += string(c)
378 }
379 }
380 return r
381}
382