1// Package publicsuffix implements a public suffix list to look up the
2// organizational domain for a given host name. Organizational domains can be
3// registered, one level below a top-level domain.
4//
5// Example.com has a public suffix ".com", and example.co.uk has a public
6// suffix ".co.uk". The organizational domain of sub.example.com is
7// example.com, and the organization domain of sub.example.co.uk is
8// example.co.uk.
9package publicsuffix
10
11import (
12 "bufio"
13 "bytes"
14 "context"
15 "fmt"
16 "io"
17 "log/slog"
18 "strings"
19
20 _ "embed"
21
22 "golang.org/x/net/idna"
23
24 "github.com/mjl-/mox/dns"
25 "github.com/mjl-/mox/mlog"
26)
27
28// todo: automatically fetch new lists periodically? compare it with the old one. refuse it if it changed too much, especially if it contains far fewer entries than before.
29
30// Labels map from utf8 labels to labels for subdomains.
31// The end is marked with an empty string as label.
32type labels map[string]labels
33
34// List is a public suffix list.
35type List struct {
36 includes, excludes labels
37}
38
39var publicsuffixList List
40
41//go:embed public_suffix_list.txt
42var publicsuffixData []byte
43
44func init() {
45 log := mlog.New("publicsuffix", nil)
46 l, err := ParseList(log.Logger, bytes.NewReader(publicsuffixData))
47 if err != nil {
48 log.Fatalx("parsing public suffix list", err)
49 }
50 publicsuffixList = l
51}
52
53// ParseList parses a public suffix list.
54// Only the "ICANN DOMAINS" are used.
55func ParseList(elog *slog.Logger, r io.Reader) (List, error) {
56 log := mlog.New("publicsuffix", elog)
57
58 list := List{labels{}, labels{}}
59 br := bufio.NewReader(r)
60
61 // Only use ICANN domains. ../rfc/7489-eid6729
62 var icannDomains bool
63 for {
64 line, err := br.ReadString('\n')
65 if line != "" {
66 line = strings.TrimSpace(line)
67 if strings.HasPrefix(line, "// ===BEGIN ICANN DOMAINS===") {
68 icannDomains = true
69 continue
70 } else if strings.HasPrefix(line, "// ===END ICANN DOMAINS===") {
71 icannDomains = false
72 continue
73 } else if line == "" || strings.HasPrefix(line, "//") || !icannDomains {
74 continue
75 }
76 l := list.includes
77 var t []string
78 oline := line
79 if strings.HasPrefix(line, "!") {
80 line = line[1:]
81 l = list.excludes
82 t = strings.Split(line, ".")
83 if len(t) == 1 {
84 log.Print("exclude rule with single label, skipping", slog.String("line", oline))
85 continue
86 }
87 } else {
88 t = strings.Split(line, ".")
89 }
90 for i := len(t) - 1; i >= 0; i-- {
91 w := t[i]
92 if w == "" {
93 log.Print("empty label in rule, skipping", slog.String("line", oline))
94 break
95 }
96 if w != "" && w != "*" {
97 w, err = idna.Lookup.ToUnicode(w)
98 if err != nil {
99 log.Printx("invalid label, skipping", err, slog.String("line", oline))
100 }
101 }
102 m, ok := l[w]
103 if ok {
104 if _, dup := m[""]; i == 0 && dup {
105 log.Print("duplicate rule", slog.String("line", oline))
106 }
107 l = m
108 } else {
109 m = labels{}
110 l[w] = m
111 l = m
112 }
113 }
114 l[""] = nil // Mark end.
115 }
116 if err == io.EOF {
117 break
118 }
119 if err != nil {
120 return List{}, fmt.Errorf("reading public suffix list: %w", err)
121 }
122 }
123 return list, nil
124}
125
126// Lookup calls Lookup on the builtin public suffix list, from
127// https://publicsuffix.org/list/.
128func Lookup(ctx context.Context, elog *slog.Logger, domain dns.Domain) (orgDomain dns.Domain) {
129 return publicsuffixList.Lookup(ctx, elog, domain)
130}
131
132// Lookup returns the organizational domain. If domain is an organizational
133// domain, or higher-level, the same domain is returned.
134func (l List) Lookup(ctx context.Context, elog *slog.Logger, domain dns.Domain) (orgDomain dns.Domain) {
135 log := mlog.New("publicsuffix", elog)
136 defer func() {
137 log.Debug("publicsuffix lookup result", slog.Any("reqdom", domain), slog.Any("orgdom", orgDomain))
138 }()
139
140 t := strings.Split(domain.Name(), ".")
141
142 var n int
143 if nexcl, ok := match(l.excludes, t); ok {
144 n = nexcl
145 } else if nincl, ok := match(l.includes, t); ok {
146 n = nincl + 1
147 } else {
148 n = 2
149 }
150 if len(t) < n {
151 return domain
152 }
153 name := strings.Join(t[len(t)-n:], ".")
154 if isASCII(name) {
155 return dns.Domain{ASCII: name}
156 }
157 t = strings.Split(domain.ASCII, ".")
158 ascii := strings.Join(t[len(t)-n:], ".")
159 return dns.Domain{ASCII: ascii, Unicode: name}
160}
161
162func isASCII(s string) bool {
163 for _, c := range s {
164 if c >= 0x80 {
165 return false
166 }
167 }
168 return true
169}
170
171func match(l labels, t []string) (int, bool) {
172 if len(t) == 0 {
173 _, ok := l[""]
174 return 0, ok
175 }
176 s := t[len(t)-1]
177 t = t[:len(t)-1]
178 n := 0
179 if m, mok := l[s]; mok {
180 if nn, sok := match(m, t); sok {
181 n = 1 + nn
182 }
183 }
184 if m, mok := l["*"]; mok {
185 if nn, sok := match(m, t); sok && nn >= n {
186 n = 1 + nn
187 }
188 }
189 _, mok := l[""]
190 return n, n > 0 || mok
191}
192