1package dsn
2
3import (
4 "bufio"
5 "fmt"
6 "io"
7 "net/textproto"
8 "strconv"
9 "strings"
10 "time"
11
12 "golang.org/x/exp/slog"
13
14 "github.com/mjl-/mox/dns"
15 "github.com/mjl-/mox/message"
16 "github.com/mjl-/mox/mlog"
17 "github.com/mjl-/mox/smtp"
18)
19
20// Parse reads a DSN message.
21//
22// A DSN is a multipart internet mail message with 2 or 3 parts: human-readable
23// text, machine-parsable text, and optional original message or headers.
24//
25// The first return value is the machine-parsed DSN message. The second value is
26// the entire MIME multipart message. Use its Parts field to access the
27// human-readable text and optional original message/headers.
28func Parse(elog *slog.Logger, r io.ReaderAt) (*Message, *message.Part, error) {
29 log := mlog.New("dsn", elog)
30
31 // DSNs can mix and match subtypes with and without utf-8. ../rfc/6533:441
32
33 part, err := message.Parse(log.Logger, false, r)
34 if err != nil {
35 return nil, nil, fmt.Errorf("parsing message: %v", err)
36 }
37 if part.MediaType != "MULTIPART" || part.MediaSubType != "REPORT" {
38 return nil, nil, fmt.Errorf(`message has content-type %q, must have "message/report"`, strings.ToLower(part.MediaType+"/"+part.MediaSubType))
39 }
40 err = part.Walk(log.Logger, nil)
41 if err != nil {
42 return nil, nil, fmt.Errorf("parsing message parts: %v", err)
43 }
44 nparts := len(part.Parts)
45 if nparts != 2 && nparts != 3 {
46 return nil, nil, fmt.Errorf("invalid dsn, got %d multipart parts, 2 or 3 required", nparts)
47 }
48 p0 := part.Parts[0]
49 if !(p0.MediaType == "" && p0.MediaSubType == "") && !(p0.MediaType == "TEXT" && p0.MediaSubType == "PLAIN") {
50 return nil, nil, fmt.Errorf(`invalid dsn, first part has content-type %q, must have "text/plain"`, strings.ToLower(p0.MediaType+"/"+p0.MediaSubType))
51 }
52
53 p1 := part.Parts[1]
54 var m *Message
55 if !(p1.MediaType == "MESSAGE" && (p1.MediaSubType == "DELIVERY-STATUS" || p1.MediaSubType == "GLOBAL-DELIVERY-STATUS")) {
56 return nil, nil, fmt.Errorf(`invalid dsn, second part has content-type %q, must have "message/delivery-status" or "message/global-delivery-status"`, strings.ToLower(p1.MediaType+"/"+p1.MediaSubType))
57 }
58 utf8 := p1.MediaSubType == "GLOBAL-DELIVERY-STATUS"
59 m, err = Decode(p1.Reader(), utf8)
60 if err != nil {
61 return nil, nil, fmt.Errorf("parsing dsn delivery-status part: %v", err)
62 }
63
64 addressPath := func(a message.Address) (smtp.Path, error) {
65 d, err := dns.ParseDomain(a.Host)
66 if err != nil {
67 return smtp.Path{}, fmt.Errorf("parsing domain: %v", err)
68 }
69 return smtp.Path{Localpart: smtp.Localpart(a.User), IPDomain: dns.IPDomain{Domain: d}}, nil
70 }
71 if len(part.Envelope.From) == 1 {
72 m.From, err = addressPath(part.Envelope.From[0])
73 if err != nil {
74 return nil, nil, fmt.Errorf("parsing From-header: %v", err)
75 }
76 }
77 if len(part.Envelope.To) == 1 {
78 m.To, err = addressPath(part.Envelope.To[0])
79 if err != nil {
80 return nil, nil, fmt.Errorf("parsing To-header: %v", err)
81 }
82 }
83 m.Subject = part.Envelope.Subject
84 buf, err := io.ReadAll(p0.ReaderUTF8OrBinary())
85 if err != nil {
86 return nil, nil, fmt.Errorf("reading human-readable text part: %v", err)
87 }
88 m.TextBody = strings.ReplaceAll(string(buf), "\r\n", "\n")
89
90 if nparts == 2 {
91 return m, &part, nil
92 }
93
94 p2 := part.Parts[2]
95 ct := strings.ToLower(p2.MediaType + "/" + p2.MediaSubType)
96 switch ct {
97 case "text/rfc822-headers":
98 case "message/global-headers":
99 case "message/rfc822":
100 case "message/global":
101 default:
102 return nil, nil, fmt.Errorf("invalid content-type %q for optional third part with original message/headers", ct)
103 }
104
105 return m, &part, nil
106}
107
108// Decode parses the (global) delivery-status part of a DSN.
109//
110// utf8 indicates if UTF-8 is allowed for this message, if used by the media
111// subtype of the message parts.
112func Decode(r io.Reader, utf8 bool) (*Message, error) {
113 m := Message{SMTPUTF8: utf8}
114
115 // We are using textproto.Reader to read mime headers. It requires a header section ending in \r\n.
116 // ../rfc/3464:486
117 b := bufio.NewReader(io.MultiReader(r, strings.NewReader("\r\n")))
118 mr := textproto.NewReader(b)
119
120 // Read per-message lines.
121 // ../rfc/3464:1522 ../rfc/6533:366
122 msgh, err := mr.ReadMIMEHeader()
123 if err != nil {
124 return nil, fmt.Errorf("reading per-message lines: %v", err)
125 }
126 for k, l := range msgh {
127 if len(l) != 1 {
128 return nil, fmt.Errorf("multiple values for %q: %v", k, l)
129 }
130 v := l[0]
131 // note: headers are in canonical form, as parsed by textproto.
132 switch k {
133 case "Original-Envelope-Id":
134 m.OriginalEnvelopeID = v
135 case "Reporting-Mta":
136 mta, err := parseMTA(v, utf8)
137 if err != nil {
138 return nil, fmt.Errorf("parsing reporting-mta: %v", err)
139 }
140 m.ReportingMTA = mta
141 case "Dsn-Gateway":
142 mta, err := parseMTA(v, utf8)
143 if err != nil {
144 return nil, fmt.Errorf("parsing dsn-gateway: %v", err)
145 }
146 m.DSNGateway = mta
147 case "Received-From-Mta":
148 mta, err := parseMTA(v, utf8)
149 if err != nil {
150 return nil, fmt.Errorf("parsing received-from-mta: %v", err)
151 }
152 d, err := dns.ParseDomain(mta)
153 if err != nil {
154 return nil, fmt.Errorf("parsing received-from-mta domain %q: %v", mta, err)
155 }
156 m.ReceivedFromMTA = smtp.Ehlo{Name: dns.IPDomain{Domain: d}}
157 case "Arrival-Date":
158 tm, err := parseDateTime(v)
159 if err != nil {
160 return nil, fmt.Errorf("parsing arrival-date: %v", err)
161 }
162 m.ArrivalDate = tm
163 default:
164 // We'll assume it is an extension field, we'll ignore it for now.
165 }
166 }
167 m.MessageHeader = msgh
168
169 required := []string{"Reporting-Mta"}
170 for _, req := range required {
171 if _, ok := msgh[req]; !ok {
172 return nil, fmt.Errorf("missing required recipient field %q", req)
173 }
174 }
175
176 rh, err := parseRecipientHeader(mr, utf8)
177 if err != nil {
178 return nil, fmt.Errorf("reading per-recipient header: %v", err)
179 }
180 m.Recipients = []Recipient{rh}
181 for {
182 if _, err := b.Peek(1); err == io.EOF {
183 break
184 }
185 rh, err := parseRecipientHeader(mr, utf8)
186 if err != nil {
187 return nil, fmt.Errorf("reading another per-recipient header: %v", err)
188 }
189 m.Recipients = append(m.Recipients, rh)
190 }
191 return &m, nil
192}
193
194// ../rfc/3464:1530 ../rfc/6533:370
195func parseRecipientHeader(mr *textproto.Reader, utf8 bool) (Recipient, error) {
196 var r Recipient
197 h, err := mr.ReadMIMEHeader()
198 if err != nil {
199 return Recipient{}, err
200 }
201
202 for k, l := range h {
203 if len(l) != 1 {
204 return Recipient{}, fmt.Errorf("multiple values for %q: %v", k, l)
205 }
206 v := l[0]
207 // note: headers are in canonical form, as parsed by textproto.
208 var err error
209 switch k {
210 case "Original-Recipient":
211 r.OriginalRecipient, err = parseAddress(v, utf8)
212 case "Final-Recipient":
213 r.FinalRecipient, err = parseAddress(v, utf8)
214 case "Action":
215 a := Action(strings.ToLower(v))
216 actions := []Action{Failed, Delayed, Delivered, Relayed, Expanded}
217 var ok bool
218 for _, x := range actions {
219 if a == x {
220 ok = true
221 break
222 }
223 }
224 if !ok {
225 err = fmt.Errorf("unrecognized action %q", v)
226 }
227 case "Status":
228 // todo: parse the enhanced status code?
229 r.Status = v
230 case "Remote-Mta":
231 r.RemoteMTA = NameIP{Name: v}
232 case "Diagnostic-Code":
233 // ../rfc/3464:518
234 t := strings.SplitN(v, ";", 2)
235 dt := strings.TrimSpace(t[0])
236 if strings.ToLower(dt) != "smtp" {
237 err = fmt.Errorf("unknown diagnostic-type %q, expected smtp", dt)
238 } else if len(t) != 2 {
239 err = fmt.Errorf("missing semicolon to separate diagnostic-type from code")
240 } else {
241 r.DiagnosticCode = strings.TrimSpace(t[1])
242 }
243 case "Last-Attempt-Date":
244 r.LastAttemptDate, err = parseDateTime(v)
245 case "Final-Log-Id":
246 r.FinalLogID = v
247 case "Will-Retry-Until":
248 tm, err := parseDateTime(v)
249 if err == nil {
250 r.WillRetryUntil = &tm
251 }
252 default:
253 // todo future: parse localized diagnostic text field?
254 // We'll assume it is an extension field, we'll ignore it for now.
255 }
256 if err != nil {
257 return Recipient{}, fmt.Errorf("parsing field %q %q: %v", k, v, err)
258 }
259 }
260
261 required := []string{"Final-Recipient", "Action", "Status"}
262 for _, req := range required {
263 if _, ok := h[req]; !ok {
264 return Recipient{}, fmt.Errorf("missing required recipient field %q", req)
265 }
266 }
267
268 r.Header = h
269 return r, nil
270}
271
272// ../rfc/3464:525
273func parseMTA(s string, utf8 bool) (string, error) {
274 s = removeComments(s)
275 t := strings.SplitN(s, ";", 2)
276 if len(t) != 2 {
277 return "", fmt.Errorf("missing semicolon that splits type and name")
278 }
279 k := strings.TrimSpace(t[0])
280 if !strings.EqualFold(k, "dns") {
281 return "", fmt.Errorf("unknown type %q, expected dns", k)
282 }
283 return strings.TrimSpace(t[1]), nil
284}
285
286func parseDateTime(s string) (time.Time, error) {
287 s = removeComments(s)
288 return time.Parse(message.RFC5322Z, s)
289}
290
291func parseAddress(s string, utf8 bool) (smtp.Path, error) {
292 s = removeComments(s)
293 t := strings.SplitN(s, ";", 2)
294 // ../rfc/3464:513 ../rfc/6533:250
295 addrType := strings.ToLower(strings.TrimSpace(t[0]))
296 if len(t) != 2 {
297 return smtp.Path{}, fmt.Errorf("missing semicolon that splits address type and address")
298 } else if addrType == "utf-8" {
299 if !utf8 {
300 return smtp.Path{}, fmt.Errorf("utf-8 address type for non-utf-8 dsn")
301 }
302 } else if addrType != "rfc822" {
303 return smtp.Path{}, fmt.Errorf("unrecognized address type %q, expected rfc822", addrType)
304 }
305 s = strings.TrimSpace(t[1])
306 if !utf8 {
307 for _, c := range s {
308 if c > 0x7f {
309 return smtp.Path{}, fmt.Errorf("non-ascii without utf-8 enabled")
310 }
311 }
312 }
313 // todo: more proper parser
314 t = strings.SplitN(s, "@", 2)
315 if len(t) != 2 || t[0] == "" || t[1] == "" {
316 return smtp.Path{}, fmt.Errorf("invalid email address")
317 }
318 d, err := dns.ParseDomain(t[1])
319 if err != nil {
320 return smtp.Path{}, fmt.Errorf("parsing domain: %v", err)
321 }
322 var lp string
323 var esc string
324 for _, c := range t[0] {
325 if esc == "" && c == '\\' || esc == `\` && (c == 'x' || c == 'X') || esc == `\x` && c == '{' {
326 if c == 'X' {
327 c = 'x'
328 }
329 esc += string(c)
330 } else if strings.HasPrefix(esc, `\x{`) {
331 if c == '}' {
332 c, err := strconv.ParseInt(esc[3:], 16, 32)
333 if err != nil {
334 return smtp.Path{}, fmt.Errorf("parsing localpart with hexpoint: %v", err)
335 }
336 lp += string(rune(c))
337 esc = ""
338 } else {
339 esc += string(c)
340 }
341 } else {
342 lp += string(c)
343 }
344 }
345 if esc != "" {
346 return smtp.Path{}, fmt.Errorf("parsing localpart: unfinished embedded unicode char")
347 }
348 p := smtp.Path{Localpart: smtp.Localpart(lp), IPDomain: dns.IPDomain{Domain: d}}
349 return p, nil
350}
351
352func removeComments(s string) string {
353 n := 0
354 r := ""
355 for _, c := range s {
356 if c == '(' {
357 n++
358 } else if c == ')' && n > 0 {
359 n--
360 } else if n == 0 {
361 r += string(c)
362 }
363 }
364 return r
365}
366