1// Package publicsuffix implements a public suffix list to look up the
2// organizational domain for a given host name. Organizational domains can be
3// registered, one level below a top-level domain.
5// Example.com has a public suffix ".com", and example.co.uk has a public
6// suffix ".co.uk". The organizational domain of sub.example.com is
7// example.com, and the organization domain of sub.example.co.uk is
22 "golang.org/x/net/idna"
24 "github.com/mjl-/mox/dns"
25 "github.com/mjl-/mox/mlog"
28// todo: automatically fetch new lists periodically? compare it with the old one. refuse it if it changed too much, especially if it contains far fewer entries than before.
30// Labels map from utf8 labels to labels for subdomains.
31// The end is marked with an empty string as label.
32type labels map[string]labels
34// List is a public suffix list.
36 includes, excludes labels
39var publicsuffixList List
41//go:embed public_suffix_list.txt
42var publicsuffixData []byte
45 log := mlog.New("publicsuffix", nil)
46 l, err := ParseList(log.Logger, bytes.NewReader(publicsuffixData))
48 log.Fatalx("parsing public suffix list", err)
53// ParseList parses a public suffix list.
54// Only the "ICANN DOMAINS" are used.
55func ParseList(elog *slog.Logger, r io.Reader) (List, error) {
56 log := mlog.New("publicsuffix", elog)
58 list := List{labels{}, labels{}}
59 br := bufio.NewReader(r)
64 line, err := br.ReadString('\n')
66 line = strings.TrimSpace(line)
67 if strings.HasPrefix(line, "// ===BEGIN ICANN DOMAINS===") {
70 } else if strings.HasPrefix(line, "// ===END ICANN DOMAINS===") {
73 } else if line == "" || strings.HasPrefix(line, "//") || !icannDomains {
79 if strings.HasPrefix(line, "!") {
82 t = strings.Split(line, ".")
84 log.Print("exclude rule with single label, skipping", slog.String("line", oline))
88 t = strings.Split(line, ".")
90 for i := len(t) - 1; i >= 0; i-- {
93 log.Print("empty label in rule, skipping", slog.String("line", oline))
96 if w != "" && w != "*" {
97 w, err = idna.Lookup.ToUnicode(w)
99 log.Printx("invalid label, skipping", err, slog.String("line", oline))
104 if _, dup := m[""]; i == 0 && dup {
105 log.Print("duplicate rule", slog.String("line", oline))
114 l[""] = nil // Mark end.
120 return List{}, fmt.Errorf("reading public suffix list: %w", err)
126// Lookup calls Lookup on the builtin public suffix list, from
127// https://publicsuffix.org/list/.
128func Lookup(ctx context.Context, elog *slog.Logger, domain dns.Domain) (orgDomain dns.Domain) {
129 return publicsuffixList.Lookup(ctx, elog, domain)
132// Lookup returns the organizational domain. If domain is an organizational
133// domain, or higher-level, the same domain is returned.
134func (l List) Lookup(ctx context.Context, elog *slog.Logger, domain dns.Domain) (orgDomain dns.Domain) {
135 log := mlog.New("publicsuffix", elog)
137 log.Debug("publicsuffix lookup result", slog.Any("reqdom", domain), slog.Any("orgdom", orgDomain))
140 t := strings.Split(domain.Name(), ".")
143 if nexcl, ok := match(l.excludes, t); ok {
145 } else if nincl, ok := match(l.includes, t); ok {
153 name := strings.Join(t[len(t)-n:], ".")
155 return dns.Domain{ASCII: name}
157 t = strings.Split(domain.ASCII, ".")
158 ascii := strings.Join(t[len(t)-n:], ".")
159 return dns.Domain{ASCII: ascii, Unicode: name}
162func isASCII(s string) bool {
163 for _, c := range s {
171func match(l labels, t []string) (int, bool) {
179 if m, mok := l[s]; mok {
180 if nn, sok := match(m, t); sok {
184 if m, mok := l["*"]; mok {
185 if nn, sok := match(m, t); sok && nn >= n {
190 return n, n > 0 || mok