1package message

3import (

4 "bufio"

5 "fmt"

6 "io"

7 "regexp"

8 "slices"

9 "strings"

11 "golang.org/x/net/html"

12 "golang.org/x/net/html/atom"

14 "github.com/mjl-/mox/mlog"

15 "github.com/mjl-/mox/moxio"

16)

18// Preview returns a message preview, based on the first text/plain or text/html

19// part of the message that has textual content. Preview returns at most 256

20// characters (possibly more bytes). Callers may want to truncate and trim trailing

21// whitespace before using the preview.

22//

23// Preview logs at debug level for invalid messages. An error is only returned for

24// serious errors, like i/o errors.

25func (p Part) Preview(log mlog.Log) (string, error) {

26 // ../rfc/8970:190

28 // Don't use if Content-Disposition attachment.

29 disp, _, err := p.DispositionFilename()

30 if err != nil {

31 log.Debugx("parsing disposition/filename", err)

32 } else if strings.EqualFold(disp, "attachment") {

33 return "", nil

34 }

36 mt := p.MediaType + "/" + p.MediaSubType

37 switch mt {

38 case "TEXT/PLAIN", "/":

39 r := &moxio.LimitReader{R: p.ReaderUTF8OrBinary(), Limit: 100 * 1024}

40 s, err := previewText(r)

41 if err != nil {

42 return "", fmt.Errorf("making preview from text part: %v", err)

43 }

44 return s, nil

46 case "TEXT/HTML":

47 r := &moxio.LimitReader{R: p.ReaderUTF8OrBinary(), Limit: 1024 * 1024}

49 // First turn the HTML into text.

50 s, err := previewHTML(r)

51 if err != nil {

52 log.Debugx("parsing html part for preview (ignored)", err)

53 return "", nil

54 }

56 // Turn text body into a preview text.

57 s, err = previewText(strings.NewReader(s))

58 if err != nil {

59 return "", fmt.Errorf("making preview from text from html: %v", err)

60 }

61 return s, nil

63 case "MULTIPART/ENCRYPTED":

64 return "", nil

65 }

67 for i, sp := range p.Parts {

68 if mt == "MULTIPART/SIGNED" && i >= 1 {

69 break

70 }

71 s, err := sp.Preview(log)

72 if err != nil || s != "" {

73 return s, err

74 }

75 }

76 return "", nil

77}

79// previewText returns a line the client can display next to the subject line

80// in a mailbox. It will replace quoted text, and any prefixing "On ... wrote:"

81// line with "[...]" so only new and useful information will be displayed.

82// Trailing signatures are not included.

83func previewText(r io.Reader) (string, error) {

84 // We look quite a bit of lines ahead for trailing signatures with trailing empty lines.

85 var lines []string

86 scanner := bufio.NewScanner(r)

87 ensureLines := func() {

88 for len(lines) < 10 && scanner.Scan() {

89 lines = append(lines, strings.TrimSpace(scanner.Text()))

90 }

91 }

92 ensureLines()

94 isSnipped := func(s string) bool {

95 return s == "[...]" || s == "[…]" || s == "..."

96 }

98 nextLineQuoted := func(i int) bool {

99 if i+1 < len(lines) && lines[i+1] == "" {

100 i++

101 }

102 return i+1 < len(lines) && (strings.HasPrefix(lines[i+1], ">") || isSnipped(lines[i+1]))

103 }

104

105 // Remainder is signature if we see a line with only and minimum 2 dashes, and

106 // there are no more empty lines, and there aren't more than 5 lines left.

107 isSignature := func() bool {

108 if len(lines) == 0 || !strings.HasPrefix(lines[0], "--") || strings.Trim(strings.TrimSpace(lines[0]), "-") != "" {

109 return false

110 }

111 l := lines[1:]

112 for len(l) > 0 && l[len(l)-1] == "" {

113 l = l[:len(l)-1]

114 }

115 if len(l) >= 5 {

116 return false

117 }

118 return !slices.Contains(l, "")

119 }

120

121 result := ""

122

123 resultSnipped := func() bool {

124 return strings.HasSuffix(result, "[...]\n") || strings.HasSuffix(result, "[…]")

125 }

126

127 // Quick check for initial wrapped "On ... wrote:" line.

128 if len(lines) > 3 && strings.HasPrefix(lines[0], "On ") && !strings.HasSuffix(lines[0], "wrote:") && strings.HasSuffix(lines[1], ":") && nextLineQuoted(1) {

129 result = "[...]\n"

130 lines = lines[3:]

131 ensureLines()

132 }

133

134 for ; len(lines) > 0 && !isSignature(); ensureLines() {

135 line := lines[0]

136 if strings.HasPrefix(line, ">") {

137 if !resultSnipped() {

138 result += "[...]\n"

139 }

140 lines = lines[1:]

141 continue

142 }

143 if line == "" {

144 lines = lines[1:]

145 continue

146 }

147 // Check for a "On <date>, <person> wrote:", we require digits before a quoted

148 // line, with an optional empty line in between. If we don't have any text yet, we

149 // don't require the digits.

150 if strings.HasSuffix(line, ":") && (strings.ContainsAny(line, "0123456789") || result == "") && nextLineQuoted(0) {

151 if !resultSnipped() {

152 result += "[...]\n"

153 }

154 lines = lines[1:]

155 continue

156 }

157 // Skip possibly duplicate snipping by author.

158 if !isSnipped(line) || !resultSnipped() {

159 result += line + "\n"

160 }

161 lines = lines[1:]

162 if len(result) > 250 {

163 break

164 }

165 }

166

167 // Limit number of characters (not bytes). ../rfc/8970:200

168 // To 256 characters. ../rfc/8970:211

169 var o, n int

170 for o = range result {

171 n++

172 if n > 256 {

173 result = result[:o]

174 break

175 }

176 }

177

178 return result, scanner.Err()

179}

180

181// Any text inside these html elements (recursively) is ignored.

182var ignoreAtoms = atomMap(

183 atom.Dialog,

184 atom.Head,

185 atom.Map,

186 atom.Math,

187 atom.Script,

188 atom.Style,

189 atom.Svg,

190 atom.Template,

191)

192

193// Inline elements don't force newlines at beginning & end of text in this element.

194// https://developer.mozilla.org/en-US/docs/Web/HTML/Element#inline_text_semantics

195var inlineAtoms = atomMap(

196 atom.A,

197 atom.Abbr,

198 atom.B,

199 atom.Bdi,

200 atom.Bdo,

201 atom.Cite,

202 atom.Code,

203 atom.Data,

204 atom.Dfn,

205 atom.Em,

206 atom.I,

207 atom.Kbd,

208 atom.Mark,

209 atom.Q,

210 atom.Rp,

211 atom.Rt,

212 atom.Ruby,

213 atom.S,

214 atom.Samp,

215 atom.Small,

216 atom.Span,

217 atom.Strong,

218 atom.Sub,

219 atom.Sup,

220 atom.Time,

221 atom.U,

222 atom.Var,

223 atom.Wbr,

224

225 atom.Del,

226 atom.Ins,

227

228 // We treat these specially, inserting a space after them instead of a newline.

229 atom.Td,

230 atom.Th,

231)

232

233func atomMap(l ...atom.Atom) map[atom.Atom]bool {

234 m := map[atom.Atom]bool{}

235 for _, a := range l {

236 m[a] = true

237 }

238 return m

239}

240

241var regexpSpace = regexp.MustCompile(`[ \t]+`) // Replaced with single space.

242var regexpNewline = regexp.MustCompile(`\n\n\n+`) // Replaced with single newline.

243var regexpZeroWidth = regexp.MustCompile("[\u00a0\u200b\u200c\u200d][\u00a0\u200b\u200c\u200d]+") // Removed, combinations don't make sense, generated.

244

245func previewHTML(r io.Reader) (string, error) {

246 // Stack/state, based on elements.

247 var ignores []bool

248 var inlines []bool

249

250 var text string // Collecting text.

251 var err error // Set when walking DOM.

252 var quoteLevel int

253

254 // We'll walk the DOM nodes, keeping track of whether we are ignoring text, and

255 // whether we are in an inline or block element, and building up the text. We stop