1package dsn
2
3import (
4 "bufio"
5 "fmt"
6 "io"
7 "log/slog"
8 "net/textproto"
9 "strconv"
10 "strings"
11 "time"
12
13 "github.com/mjl-/mox/dns"
14 "github.com/mjl-/mox/message"
15 "github.com/mjl-/mox/mlog"
16 "github.com/mjl-/mox/smtp"
17 "slices"
18)
19
20// Parse reads a DSN message.
21//
22// A DSN is a multipart internet mail message with 2 or 3 parts: human-readable
23// text, machine-parsable text, and optional original message or headers.
24//
25// The first return value is the machine-parsed DSN message. The second value is
26// the entire MIME multipart message. Use its Parts field to access the
27// human-readable text and optional original message/headers.
28func Parse(elog *slog.Logger, r io.ReaderAt) (*Message, *message.Part, error) {
29 log := mlog.New("dsn", elog)
30
31 // DSNs can mix and match subtypes with and without utf-8. ../rfc/6533:441
32
33 part, err := message.Parse(log.Logger, false, r)
34 if err != nil {
35 return nil, nil, fmt.Errorf("parsing message: %v", err)
36 }
37 if part.MediaType != "MULTIPART" || part.MediaSubType != "REPORT" {
38 return nil, nil, fmt.Errorf(`message has content-type %q, must have "message/report"`, strings.ToLower(part.MediaType+"/"+part.MediaSubType))
39 }
40 err = part.Walk(log.Logger, nil)
41 if err != nil {
42 return nil, nil, fmt.Errorf("parsing message parts: %v", err)
43 }
44 nparts := len(part.Parts)
45 if nparts != 2 && nparts != 3 {
46 return nil, nil, fmt.Errorf("invalid dsn, got %d multipart parts, 2 or 3 required", nparts)
47 }
48 p0 := part.Parts[0]
49 if !(p0.MediaType == "" && p0.MediaSubType == "") && !(p0.MediaType == "TEXT" && p0.MediaSubType == "PLAIN") {
50 return nil, nil, fmt.Errorf(`invalid dsn, first part has content-type %q, must have "text/plain"`, strings.ToLower(p0.MediaType+"/"+p0.MediaSubType))
51 }
52
53 p1 := part.Parts[1]
54 var m *Message
55 if !(p1.MediaType == "MESSAGE" && (p1.MediaSubType == "DELIVERY-STATUS" || p1.MediaSubType == "GLOBAL-DELIVERY-STATUS")) {
56 return nil, nil, fmt.Errorf(`invalid dsn, second part has content-type %q, must have "message/delivery-status" or "message/global-delivery-status"`, strings.ToLower(p1.MediaType+"/"+p1.MediaSubType))
57 }
58 utf8 := p1.MediaSubType == "GLOBAL-DELIVERY-STATUS"
59 m, err = Decode(p1.Reader(), utf8)
60 if err != nil {
61 return nil, nil, fmt.Errorf("parsing dsn delivery-status part: %v", err)
62 }
63
64 addressPath := func(a message.Address) (smtp.Path, error) {
65 d, err := dns.ParseDomain(a.Host)
66 if err != nil {
67 return smtp.Path{}, fmt.Errorf("parsing domain: %v", err)
68 }
69 lp, err := smtp.ParseLocalpart(a.User)
70 if err != nil {
71 return smtp.Path{}, fmt.Errorf("parsing localpart: %v", err)
72 }
73 return smtp.Path{Localpart: lp, IPDomain: dns.IPDomain{Domain: d}}, nil
74 }
75 if len(part.Envelope.From) == 1 {
76 m.From, err = addressPath(part.Envelope.From[0])
77 if err != nil {
78 return nil, nil, fmt.Errorf("parsing From-header: %v", err)
79 }
80 }
81 if len(part.Envelope.To) == 1 {
82 m.To, err = addressPath(part.Envelope.To[0])
83 if err != nil {
84 return nil, nil, fmt.Errorf("parsing To-header: %v", err)
85 }
86 }
87 m.Subject = part.Envelope.Subject
88 buf, err := io.ReadAll(p0.ReaderUTF8OrBinary())
89 if err != nil {
90 return nil, nil, fmt.Errorf("reading human-readable text part: %v", err)
91 }
92 m.TextBody = strings.ReplaceAll(string(buf), "\r\n", "\n")
93
94 if nparts == 2 {
95 return m, &part, nil
96 }
97
98 p2 := part.Parts[2]
99 ct := strings.ToLower(p2.MediaType + "/" + p2.MediaSubType)
100 switch ct {
101 case "text/rfc822-headers":
102 case "message/global-headers":
103 case "message/rfc822":
104 case "message/global":
105 default:
106 return nil, nil, fmt.Errorf("invalid content-type %q for optional third part with original message/headers", ct)
107 }
108
109 return m, &part, nil
110}
111
112// Decode parses the (global) delivery-status part of a DSN.
113//
114// utf8 indicates if UTF-8 is allowed for this message, if used by the media
115// subtype of the message parts.
116func Decode(r io.Reader, utf8 bool) (*Message, error) {
117 m := Message{SMTPUTF8: utf8}
118
119 // We are using textproto.Reader to read mime headers. It requires a header section ending in \r\n.
120 // ../rfc/3464:486
121 b := bufio.NewReader(io.MultiReader(r, strings.NewReader("\r\n")))
122 mr := textproto.NewReader(b)
123
124 // Read per-message lines.
125 // ../rfc/3464:1522 ../rfc/6533:366
126 msgh, err := mr.ReadMIMEHeader()
127 if err != nil {
128 return nil, fmt.Errorf("reading per-message lines: %v", err)
129 }
130 for k, l := range msgh {
131 if len(l) != 1 {
132 return nil, fmt.Errorf("multiple values for %q: %v", k, l)
133 }
134 v := l[0]
135 // note: headers are in canonical form, as parsed by textproto.
136 switch k {
137 case "Original-Envelope-Id":
138 m.OriginalEnvelopeID = v
139 case "Reporting-Mta":
140 mta, err := parseMTA(v, utf8)
141 if err != nil {
142 return nil, fmt.Errorf("parsing reporting-mta: %v", err)
143 }
144 m.ReportingMTA = mta
145 case "Dsn-Gateway":
146 mta, err := parseMTA(v, utf8)
147 if err != nil {
148 return nil, fmt.Errorf("parsing dsn-gateway: %v", err)
149 }
150 m.DSNGateway = mta
151 case "Received-From-Mta":
152 mta, err := parseMTA(v, utf8)
153 if err != nil {
154 return nil, fmt.Errorf("parsing received-from-mta: %v", err)
155 }
156 d, err := dns.ParseDomain(mta)
157 if err != nil {
158 return nil, fmt.Errorf("parsing received-from-mta domain %q: %v", mta, err)
159 }
160 m.ReceivedFromMTA = smtp.Ehlo{Name: dns.IPDomain{Domain: d}}
161 case "Arrival-Date":
162 tm, err := parseDateTime(v)
163 if err != nil {
164 return nil, fmt.Errorf("parsing arrival-date: %v", err)
165 }
166 m.ArrivalDate = tm
167 default:
168 // We'll assume it is an extension field, we'll ignore it for now.
169 }
170 }
171 m.MessageHeader = msgh
172
173 required := []string{"Reporting-Mta"}
174 for _, req := range required {
175 if _, ok := msgh[req]; !ok {
176 return nil, fmt.Errorf("missing required recipient field %q", req)
177 }
178 }
179
180 rh, err := parseRecipientHeader(mr, utf8)
181 if err != nil {
182 return nil, fmt.Errorf("reading per-recipient header: %v", err)
183 }
184 m.Recipients = []Recipient{rh}
185 for {
186 if _, err := b.Peek(1); err == io.EOF {
187 break
188 }
189 rh, err := parseRecipientHeader(mr, utf8)
190 if err != nil {
191 return nil, fmt.Errorf("reading another per-recipient header: %v", err)
192 }
193 m.Recipients = append(m.Recipients, rh)
194 }
195 return &m, nil
196}
197
198// ../rfc/3464:1530 ../rfc/6533:370
199func parseRecipientHeader(mr *textproto.Reader, utf8 bool) (Recipient, error) {
200 var r Recipient
201 h, err := mr.ReadMIMEHeader()
202 if err != nil {
203 return Recipient{}, err
204 }
205
206 for k, l := range h {
207 if len(l) != 1 {
208 return Recipient{}, fmt.Errorf("multiple values for %q: %v", k, l)
209 }
210 v := l[0]
211 // note: headers are in canonical form, as parsed by textproto.
212 var err error
213 switch k {
214 case "Original-Recipient":
215 r.OriginalRecipient, err = parseAddress(v, utf8)
216 case "Final-Recipient":
217 r.FinalRecipient, err = parseAddress(v, utf8)
218 case "Action":
219 a := Action(strings.ToLower(v))
220 actions := []Action{Failed, Delayed, Delivered, Relayed, Expanded}
221 if slices.Contains(actions, a) {
222 r.Action = a
223 } else {
224 err = fmt.Errorf("unrecognized action %q", v)
225 }
226 case "Status":
227 // todo: parse the enhanced status code?
228 r.Status = v
229 t := strings.SplitN(v, "(", 2)
230 v = strings.TrimSpace(v)
231 if len(t) == 2 && strings.HasSuffix(v, ")") {
232 r.Status = strings.TrimSpace(t[0])
233 r.StatusComment = strings.TrimSpace(strings.TrimSuffix(t[1], ")"))
234 }
235
236 case "Remote-Mta":
237 r.RemoteMTA = NameIP{Name: v}
238 case "Diagnostic-Code":
239 // ../rfc/3464:518
240 t := strings.SplitN(v, ";", 2)
241 dt := strings.TrimSpace(t[0])
242 if strings.ToLower(dt) != "smtp" {
243 err = fmt.Errorf("unknown diagnostic-type %q, expected smtp", dt)
244 } else if len(t) != 2 {
245 err = fmt.Errorf("missing semicolon to separate diagnostic-type from code")
246 } else {
247 r.DiagnosticCodeSMTP = strings.TrimSpace(t[1])
248 }
249 case "Last-Attempt-Date":
250 r.LastAttemptDate, err = parseDateTime(v)
251 case "Final-Log-Id":
252 r.FinalLogID = v
253 case "Will-Retry-Until":
254 tm, err := parseDateTime(v)
255 if err == nil {
256 r.WillRetryUntil = &tm
257 }
258 default:
259 // todo future: parse localized diagnostic text field?
260 // We'll assume it is an extension field, we'll ignore it for now.
261 }
262 if err != nil {
263 return Recipient{}, fmt.Errorf("parsing field %q %q: %v", k, v, err)
264 }
265 }
266
267 required := []string{"Final-Recipient", "Action", "Status"}
268 for _, req := range required {
269 if _, ok := h[req]; !ok {
270 return Recipient{}, fmt.Errorf("missing required recipient field %q", req)
271 }
272 }
273
274 r.Header = h
275 return r, nil
276}
277
278// ../rfc/3464:525
279func parseMTA(s string, utf8 bool) (string, error) {
280 s = removeComments(s)
281 t := strings.SplitN(s, ";", 2)
282 if len(t) != 2 {
283 return "", fmt.Errorf("missing semicolon that splits type and name")
284 }
285 k := strings.TrimSpace(t[0])
286 if !strings.EqualFold(k, "dns") {
287 return "", fmt.Errorf("unknown type %q, expected dns", k)
288 }
289 return strings.TrimSpace(t[1]), nil
290}
291
292func parseDateTime(s string) (time.Time, error) {
293 s = removeComments(s)
294 return time.Parse(message.RFC5322Z, s)
295}
296
297func parseAddress(s string, utf8 bool) (smtp.Path, error) {
298 s = removeComments(s)
299 t := strings.SplitN(s, ";", 2)
300 // ../rfc/3464:513 ../rfc/6533:250
301 addrType := strings.ToLower(strings.TrimSpace(t[0]))
302 if len(t) != 2 {
303 return smtp.Path{}, fmt.Errorf("missing semicolon that splits address type and address")
304 } else if addrType == "utf-8" {
305 if !utf8 {
306 return smtp.Path{}, fmt.Errorf("utf-8 address type for non-utf-8 dsn")
307 }
308 } else if addrType != "rfc822" {
309 return smtp.Path{}, fmt.Errorf("unrecognized address type %q, expected rfc822", addrType)
310 }
311 s = strings.TrimSpace(t[1])
312 if !utf8 {
313 for _, c := range s {
314 if c > 0x7f {
315 return smtp.Path{}, fmt.Errorf("non-ascii without utf-8 enabled")
316 }
317 }
318 }
319 // todo: more proper parser
320 t = strings.Split(s, "@")
321 if len(t) == 1 {
322 return smtp.Path{}, fmt.Errorf("invalid email address")
323 }
324 d, err := dns.ParseDomain(t[len(t)-1])
325 if err != nil {
326 return smtp.Path{}, fmt.Errorf("parsing domain: %v", err)
327 }
328 var lp string
329 var esc string
330 lead := strings.Join(t[:len(t)-1], "@")
331 for _, c := range lead {
332 if esc == "" && c == '\\' || esc == `\` && (c == 'x' || c == 'X') || esc == `\x` && c == '{' {
333 if c == 'X' {
334 c = 'x'
335 }
336 esc += string(c)
337 } else if strings.HasPrefix(esc, `\x{`) {
338 if c == '}' {
339 c, err := strconv.ParseInt(esc[3:], 16, 32)
340 if err != nil {
341 return smtp.Path{}, fmt.Errorf("parsing localpart with hexpoint: %v", err)
342 }
343 lp += string(rune(c))
344 esc = ""
345 } else {
346 esc += string(c)
347 }
348 } else {
349 lp += string(c)
350 }
351 }
352 if esc != "" {
353 return smtp.Path{}, fmt.Errorf("parsing localpart: unfinished embedded unicode char")
354 }
355 localpart, err := smtp.ParseLocalpart(lp)
356 if err != nil {
357 return smtp.Path{}, fmt.Errorf("parsing localpart: %v", err)
358 }
359 p := smtp.Path{Localpart: localpart, IPDomain: dns.IPDomain{Domain: d}}
360 return p, nil
361}
362
363func removeComments(s string) string {
364 n := 0
365 r := ""
366 for _, c := range s {
367 if c == '(' {
368 n++
369 } else if c == ')' && n > 0 {
370 n--
371 } else if n == 0 {
372 r += string(c)
373 }
374 }
375 return r
376}
377