1package webmail
2
3import (
4 "errors"
5 "fmt"
6 "io"
7 "log/slog"
8 "mime"
9 "net/textproto"
10 "net/url"
11 "strings"
12
13 "golang.org/x/text/encoding/ianaindex"
14
15 "github.com/mjl-/mox/dns"
16 "github.com/mjl-/mox/message"
17 "github.com/mjl-/mox/mlog"
18 "github.com/mjl-/mox/mox-"
19 "github.com/mjl-/mox/moxio"
20 "github.com/mjl-/mox/smtp"
21 "github.com/mjl-/mox/store"
22 "slices"
23)
24
25// todo: we should have all needed information for messageItem in store.Message (perhaps some data in message.Part) for fast access, not having to parse the on-disk message file.
26
27var wordDecoder = mime.WordDecoder{
28 CharsetReader: func(charset string, r io.Reader) (io.Reader, error) {
29 switch strings.ToLower(charset) {
30 case "", "us-ascii", "utf-8":
31 return r, nil
32 }
33 enc, _ := ianaindex.MIME.Encoding(charset)
34 if enc == nil {
35 enc, _ = ianaindex.IANA.Encoding(charset)
36 }
37 if enc == nil {
38 return r, fmt.Errorf("unknown charset %q", charset)
39 }
40 return enc.NewDecoder().Reader(r), nil
41 },
42}
43
44// Attempt q/b-word-decode name, coming from Content-Type "name" field or
45// Content-Disposition "filename" field.
46//
47// RFC 2231 specify an encoding for non-ascii values in mime header parameters. But
48// it appears common practice to instead just q/b-word encode the values.
49// Thunderbird and gmail.com do this for the Content-Type "name" parameter.
50// gmail.com also does that for the Content-Disposition "filename" parameter, where
51// Thunderbird uses the RFC 2231-defined encoding. Go's mime.ParseMediaType parses
52// the mechanism specified in RFC 2231 only. The value for "name" we get here would
53// already be decoded properly for standards-compliant headers, like
54// "filename*0*=UTF-8”%...; filename*1*=%.... We'll look for Q/B-word encoding
55// markers ("=?"-prefix or "?="-suffix) and try to decode if present. This would
56// only cause trouble for filenames having this prefix/suffix.
57func tryDecodeParam(log mlog.Log, name string) string {
58 if name == "" || !strings.HasPrefix(name, "=?") && !strings.HasSuffix(name, "?=") {
59 return name
60 }
61 // todo: find where this is allowed. it seems quite common. perhaps we should remove the pedantic check?
62 if mox.Pedantic {
63 log.Debug("attachment contains rfc2047 q/b-word-encoded mime parameter instead of rfc2231-encoded", slog.String("name", name))
64 return name
65 }
66 s, err := wordDecoder.DecodeHeader(name)
67 if err != nil {
68 log.Debugx("q/b-word decoding mime parameter", err, slog.String("name", name))
69 return name
70 }
71 return s
72}
73
74// todo: mime.FormatMediaType does not wrap long lines. should do it ourselves, and split header into several parts (if commonly supported).
75
76func messageItemMoreHeaders(moreHeaders []string, pm ParsedMessage) (l [][2]string) {
77 for _, k := range moreHeaders {
78 k = textproto.CanonicalMIMEHeaderKey(k)
79 for _, v := range pm.Headers[k] {
80 l = append(l, [2]string{k, v})
81 }
82 }
83 return l
84}
85
86func messageItem(log mlog.Log, m store.Message, state *msgState, moreHeaders []string) (MessageItem, error) {
87 headers := len(moreHeaders) > 0
88 pm, err := parsedMessage(log, &m, state, false, true, headers)
89 if err != nil && errors.Is(err, message.ErrHeader) && headers {
90 log.Debugx("load message item without parsing headers after error", err, slog.Int64("msgid", m.ID))
91 pm, err = parsedMessage(log, &m, state, false, true, false)
92 }
93 if err != nil {
94 return MessageItem{}, fmt.Errorf("parsing message %d for item: %v", m.ID, err)
95 }
96 // Clear largish unused data.
97 m.MsgPrefix = nil
98 m.ParsedBuf = nil
99 l := messageItemMoreHeaders(moreHeaders, pm)
100 return MessageItem{m, pm.envelope, pm.attachments, pm.isSigned, pm.isEncrypted, true, l}, nil
101}
102
103func parsedMessage(log mlog.Log, m *store.Message, state *msgState, full, msgitem, msgitemHeaders bool) (pm ParsedMessage, rerr error) {
104 pm.ViewMode = store.ModeText // Valid default, in case this makes it to frontend.
105
106 if full || msgitem || state.newPreviews != nil && m.Preview == nil {
107 if !state.ensurePart(*m, true) {
108 return pm, state.err
109 }
110 if full {
111 pm.Part = *state.part
112 }
113 } else {
114 if !state.ensurePart(*m, false) {
115 return pm, state.err
116 }
117 }
118 if state.newPreviews != nil && m.Preview == nil {
119 s, err := state.part.Preview(log)
120 if err != nil {
121 log.Infox("generating preview", err, slog.Int64("msgid", m.ID))
122 }
123 // Set preview on m now, and let it be saved later on.
124 m.Preview = &s
125 state.newPreviews[m.ID] = s
126 }
127
128 // todo: we should store this form in message.Part, requires a data structure update.
129
130 convertAddrs := func(l []message.Address) []MessageAddress {
131 r := make([]MessageAddress, len(l))
132 for i, a := range l {
133 d, err := dns.ParseDomain(a.Host)
134 log.Check(err, "parsing domain")
135 if err != nil {
136 d = dns.Domain{ASCII: a.Host}
137 }
138 r[i] = MessageAddress{a.Name, a.User, d}
139 }
140 return r
141 }
142
143 if full || msgitem {
144 env := MessageEnvelope{}
145 if state.part.Envelope != nil {
146 e := *state.part.Envelope
147 env.Date = e.Date
148 env.Subject = e.Subject
149 env.InReplyTo = e.InReplyTo
150 env.MessageID = e.MessageID
151 env.From = convertAddrs(e.From)
152 env.Sender = convertAddrs(e.Sender)
153 env.ReplyTo = convertAddrs(e.ReplyTo)
154 env.To = convertAddrs(e.To)
155 env.CC = convertAddrs(e.CC)
156 env.BCC = convertAddrs(e.BCC)
157 }
158 pm.envelope = env
159 }
160
161 if (full || msgitemHeaders) && state.part.BodyOffset > 0 {
162 hdrs, err := state.part.Header()
163 if err != nil {
164 return ParsedMessage{}, fmt.Errorf("parsing headers: %w", err)
165 }
166 pm.Headers = hdrs
167
168 pm.ListReplyAddress = parseListPostAddress(hdrs.Get("List-Post"))
169 } else {
170 pm.Headers = map[string][]string{}
171 }
172
173 pm.Texts = []string{}
174
175 // We track attachments from multipart/mixed differently from other attachments.
176 // The others are often inline, sometimes just some logo's in HTML alternative
177 // messages. We want to have our mixed attachments at the start of the list, but
178 // our descent-first parsing would result in inline messages first in the typical
179 // message.
180 var attachmentsMixed []Attachment
181 var attachmentsOther []Attachment
182
183 addAttachment := func(a Attachment, isMixed bool) {
184 if isMixed {
185 attachmentsMixed = append(attachmentsMixed, a)
186 } else {
187 attachmentsOther = append(attachmentsOther, a)
188 }
189 }
190
191 // todo: how should we handle messages where a user prefers html, and we want to show it, but it's a DSN that also has textual-only parts? e.g. gmail's dsn where the first part is multipart/related with multipart/alternative, and second part is the regular message/delivery-status. we want to display both the html and the text.
192
193 var usePart func(p message.Part, index int, parent *message.Part, path []int, parentMixed bool)
194 usePart = func(p message.Part, index int, parent *message.Part, path []int, parentMixed bool) {
195 mt := p.MediaType + "/" + p.MediaSubType
196 newParentMixed := mt == "MULTIPART/MIXED"
197 for i, sp := range p.Parts {
198 if mt == "MULTIPART/SIGNED" && i >= 1 {
199 continue
200 }
201 usePart(sp, i, &p, append(slices.Clone(path), i), newParentMixed)
202 }
203 switch mt {
204 case "TEXT/PLAIN", "/":
205 // Don't include if Content-Disposition attachment.
206 if full || msgitem {
207 disp, name, err := p.DispositionFilename()
208 if err != nil && errors.Is(err, message.ErrParamEncoding) {
209 log.Debugx("parsing disposition/filename", err)
210 } else if err != nil {
211 rerr = fmt.Errorf("reading disposition/filename: %v", err)
212 return
213 }
214 if strings.EqualFold(disp, "attachment") {
215 addAttachment(Attachment{path, name, p}, parentMixed)
216 return
217 }
218 }
219
220 if full {
221 buf, err := io.ReadAll(&moxio.LimitReader{R: p.ReaderUTF8OrBinary(), Limit: 2 * 1024 * 1024})
222 if err != nil {
223 rerr = fmt.Errorf("reading text part: %v", err)
224 return
225 }
226 pm.Texts = append(pm.Texts, string(buf))
227 pm.TextPaths = append(pm.TextPaths, slices.Clone(path))
228 }
229
230 case "TEXT/HTML":
231 pm.HasHTML = true
232 if full && pm.HTMLPath == nil {
233 pm.HTMLPath = slices.Clone(path)
234 }
235
236 default:
237 // todo: see if there is a common nesting messages that are both signed and encrypted.
238 if parent == nil && mt == "MULTIPART/SIGNED" {
239 pm.isSigned = true
240 }
241 if parent == nil && mt == "MULTIPART/ENCRYPTED" {
242 pm.isEncrypted = true
243 }
244 // todo: possibly do not include anything below multipart/alternative that starts with text/html, they may be cids. perhaps have a separate list of attachments for the text vs html version?
245 if p.MediaType != "MULTIPART" {
246 var parentct string
247 if parent != nil {
248 parentct = parent.MediaType + "/" + parent.MediaSubType
249 }
250
251 // Recognize DSNs.
252 if parentct == "MULTIPART/REPORT" && index == 1 && (mt == "MESSAGE/GLOBAL-DELIVERY-STATUS" || mt == "MESSAGE/DELIVERY-STATUS") {
253 if full {
254 buf, err := io.ReadAll(&moxio.LimitReader{R: p.ReaderUTF8OrBinary(), Limit: 1024 * 1024})
255 if err != nil {
256 rerr = fmt.Errorf("reading text part: %v", err)
257 return
258 }
259 pm.Texts = append(pm.Texts, string(buf))
260 pm.TextPaths = append(pm.TextPaths, slices.Clone(path))
261 }
262 return
263 }
264 if parentct == "MULTIPART/REPORT" && index == 2 && (mt == "MESSAGE/GLOBAL-HEADERS" || mt == "TEXT/RFC822-HEADERS") {
265 if full {
266 buf, err := io.ReadAll(&moxio.LimitReader{R: p.ReaderUTF8OrBinary(), Limit: 1024 * 1024})
267 if err != nil {
268 rerr = fmt.Errorf("reading text part: %v", err)
269 return
270 }
271 pm.Texts = append(pm.Texts, string(buf))
272 pm.TextPaths = append(pm.TextPaths, slices.Clone(path))
273 }
274 return
275 }
276 if parentct == "MULTIPART/REPORT" && index == 2 && (mt == "MESSAGE/GLOBAL" || mt == "TEXT/RFC822") {
277 addAttachment(Attachment{path, "original.eml", p}, parentMixed)
278 return
279 }
280
281 name := tryDecodeParam(log, p.ContentTypeParams["name"])
282 if name == "" && (full || msgitem) {
283 // todo: should have this, and perhaps all content-* headers, preparsed in message.Part?
284 h, err := p.Header()
285 log.Check(err, "parsing attachment headers", slog.Int64("msgid", m.ID))
286 cp := h.Get("Content-Disposition")
287 if cp != "" {
288 _, params, err := mime.ParseMediaType(cp)
289 log.Check(err, "parsing content-disposition", slog.String("cp", cp))
290 name = tryDecodeParam(log, params["filename"])
291 }
292 }
293 addAttachment(Attachment{path, name, p}, parentMixed)
294 }
295 }
296 }
297 usePart(*state.part, -1, nil, []int{}, false)
298
299 pm.attachments = []Attachment{}
300 pm.attachments = append(pm.attachments, attachmentsMixed...)
301 pm.attachments = append(pm.attachments, attachmentsOther...)
302
303 if rerr == nil {
304 pm.ID = m.ID
305 }
306 return
307}
308
309// parses List-Post header, returning an address if it could be found, and nil otherwise.
310func parseListPostAddress(s string) *MessageAddress {
311 /*
312 Examples:
313 List-Post: <mailto:list@host.com>
314 List-Post: <mailto:moderator@host.com> (Postings are Moderated)
315 List-Post: <mailto:moderator@host.com?subject=list%20posting>
316 List-Post: NO (posting not allowed on this list)
317 List-Post: <https://groups.google.com/group/golang-dev/post>, <mailto:golang-dev@googlegroups.com>
318 */
319 s = strings.TrimSpace(s)
320 for s != "" {
321 if !strings.HasPrefix(s, "<") {
322 return nil
323 }
324 addr, ns, found := strings.Cut(s[1:], ">")
325 if !found {
326 return nil
327 }
328 if strings.HasPrefix(addr, "mailto:") {
329 u, err := url.Parse(addr)
330 if err != nil {
331 return nil
332 }
333 addr, err := smtp.ParseAddress(u.Opaque)
334 if err != nil {
335 return nil
336 }
337 return &MessageAddress{User: addr.Localpart.String(), Domain: addr.Domain}
338 }
339 s = strings.TrimSpace(ns)
340 s = strings.TrimPrefix(s, ",")
341 s = strings.TrimSpace(s)
342 }
343 return nil
344}
345