MPH domian matcher: Support building & using cache directly (instead of building from geosite.dat when Xray starts) (#5505)

Like https://github.com/XTLS/Xray-core/pull/5488#issuecomment-3710995080
This commit is contained in:
Hossin Asaadi
2026-01-31 16:45:39 +03:30
committed by RPRX
parent afcfdbca70
commit 61e1153157
18 changed files with 988 additions and 161 deletions

View File

@@ -2,6 +2,7 @@ package router
import (
"context"
"io"
"os"
"path/filepath"
"regexp"
@@ -52,7 +53,34 @@ var matcherTypeMap = map[Domain_Type]strmatcher.Type{
}
type DomainMatcher struct {
matchers strmatcher.IndexMatcher
Matchers strmatcher.IndexMatcher
}
func SerializeDomainMatcher(domains []*Domain, w io.Writer) error {
g := strmatcher.NewMphMatcherGroup()
for _, d := range domains {
matcherType, f := matcherTypeMap[d.Type]
if !f {
continue
}
_, err := g.AddPattern(d.Value, matcherType)
if err != nil {
return err
}
}
g.Build()
// serialize
return g.Serialize(w)
}
func NewDomainMatcherFromBuffer(data []byte) (*strmatcher.MphMatcherGroup, error) {
matcher, err := strmatcher.NewMphMatcherGroupFromBuffer(data)
if err != nil {
return nil, err
}
return matcher, nil
}
func NewMphMatcherGroup(domains []*Domain) (*DomainMatcher, error) {
@@ -72,12 +100,12 @@ func NewMphMatcherGroup(domains []*Domain) (*DomainMatcher, error) {
}
g.Build()
return &DomainMatcher{
matchers: g,
Matchers: g,
}, nil
}
func (m *DomainMatcher) ApplyDomain(domain string) bool {
return len(m.matchers.Match(strings.ToLower(domain))) > 0
return len(m.Matchers.Match(strings.ToLower(domain))) > 0
}
// Apply implements Condition.

View File

@@ -0,0 +1,167 @@
package router_test
import (
"bytes"
"os"
"path/filepath"
"testing"
"github.com/stretchr/testify/require"
"github.com/xtls/xray-core/app/router"
"github.com/xtls/xray-core/common/platform/filesystem"
)
func TestDomainMatcherSerialization(t *testing.T) {
domains := []*router.Domain{
{Type: router.Domain_Domain, Value: "google.com"},
{Type: router.Domain_Domain, Value: "v2ray.com"},
{Type: router.Domain_Full, Value: "full.example.com"},
}
var buf bytes.Buffer
if err := router.SerializeDomainMatcher(domains, &buf); err != nil {
t.Fatalf("Serialize failed: %v", err)
}
matcher, err := router.NewDomainMatcherFromBuffer(buf.Bytes())
if err != nil {
t.Fatalf("Deserialize failed: %v", err)
}
dMatcher := &router.DomainMatcher{
Matchers: matcher,
}
testCases := []struct {
Input string
Match bool
}{
{"google.com", true},
{"maps.google.com", true},
{"v2ray.com", true},
{"full.example.com", true},
{"example.com", false},
}
for _, tc := range testCases {
if res := dMatcher.ApplyDomain(tc.Input); res != tc.Match {
t.Errorf("Match(%s) = %v, want %v", tc.Input, res, tc.Match)
}
}
}
func TestGeoSiteSerialization(t *testing.T) {
sites := []*router.GeoSite{
{
CountryCode: "CN",
Domain: []*router.Domain{
{Type: router.Domain_Domain, Value: "baidu.cn"},
{Type: router.Domain_Domain, Value: "qq.com"},
},
},
{
CountryCode: "US",
Domain: []*router.Domain{
{Type: router.Domain_Domain, Value: "google.com"},
{Type: router.Domain_Domain, Value: "facebook.com"},
},
},
}
var buf bytes.Buffer
if err := router.SerializeGeoSiteList(sites, nil, nil, &buf); err != nil {
t.Fatalf("SerializeGeoSiteList failed: %v", err)
}
tmp := t.TempDir()
path := filepath.Join(tmp, "matcher.cache")
f, err := os.Create(path)
require.NoError(t, err)
_, err = f.Write(buf.Bytes())
require.NoError(t, err)
f.Close()
f, err = os.Open(path)
require.NoError(t, err)
defer f.Close()
require.NoError(t, err)
data, _ := filesystem.ReadFile(path)
// cn
gp, err := router.LoadGeoSiteMatcher(bytes.NewReader(data), "CN")
if err != nil {
t.Fatalf("LoadGeoSiteMatcher(CN) failed: %v", err)
}
cnMatcher := &router.DomainMatcher{
Matchers: gp,
}
if !cnMatcher.ApplyDomain("baidu.cn") {
t.Error("CN matcher should match baidu.cn")
}
if cnMatcher.ApplyDomain("google.com") {
t.Error("CN matcher should NOT match google.com")
}
// us
gp, err = router.LoadGeoSiteMatcher(bytes.NewReader(data), "US")
if err != nil {
t.Fatalf("LoadGeoSiteMatcher(US) failed: %v", err)
}
usMatcher := &router.DomainMatcher{
Matchers: gp,
}
if !usMatcher.ApplyDomain("google.com") {
t.Error("US matcher should match google.com")
}
if usMatcher.ApplyDomain("baidu.cn") {
t.Error("US matcher should NOT match baidu.cn")
}
// unknown
_, err = router.LoadGeoSiteMatcher(bytes.NewReader(data), "unknown")
if err == nil {
t.Error("LoadGeoSiteMatcher(unknown) should fail")
}
}
func TestGeoSiteSerializationWithDeps(t *testing.T) {
sites := []*router.GeoSite{
{
CountryCode: "geosite:cn",
Domain: []*router.Domain{
{Type: router.Domain_Domain, Value: "baidu.cn"},
},
},
{
CountryCode: "geosite:google@cn",
Domain: []*router.Domain{
{Type: router.Domain_Domain, Value: "google.cn"},
},
},
{
CountryCode: "rule-1",
Domain: []*router.Domain{
{Type: router.Domain_Domain, Value: "google.com"},
},
},
}
deps := map[string][]string{
"rule-1": {"geosite:cn", "geosite:google@cn"},
}
var buf bytes.Buffer
err := router.SerializeGeoSiteList(sites, deps, nil, &buf)
require.NoError(t, err)
matcher, err := router.LoadGeoSiteMatcher(bytes.NewReader(buf.Bytes()), "rule-1")
require.NoError(t, err)
require.True(t, matcher.Match("google.com") != nil)
require.True(t, matcher.Match("baidu.cn") != nil)
require.True(t, matcher.Match("google.cn") != nil)
}

View File

@@ -7,6 +7,8 @@ import (
"strings"
"github.com/xtls/xray-core/common/errors"
"github.com/xtls/xray-core/common/platform"
"github.com/xtls/xray-core/common/platform/filesystem"
"github.com/xtls/xray-core/features/outbound"
"github.com/xtls/xray-core/features/routing"
)
@@ -105,11 +107,25 @@ func (rr *RoutingRule) BuildCondition() (Condition, error) {
}
if len(rr.Domain) > 0 {
matcher, err := NewMphMatcherGroup(rr.Domain)
if err != nil {
return nil, errors.New("failed to build domain condition with MphDomainMatcher").Base(err)
var matcher *DomainMatcher
var err error
// Check if domain matcher cache is provided via environment
domainMatcherPath := platform.NewEnvFlag(platform.MphCachePath).GetValue(func() string { return "" })
if domainMatcherPath != "" {
matcher, err = GetDomainMatcherWithRuleTag(domainMatcherPath, rr.RuleTag)
if err != nil {
return nil, errors.New("failed to build domain condition from cached MphDomainMatcher").Base(err)
}
errors.LogDebug(context.Background(), "MphDomainMatcher loaded from cache for ", rr.RuleTag, " rule tag)")
} else {
matcher, err = NewMphMatcherGroup(rr.Domain)
if err != nil {
return nil, errors.New("failed to build domain condition with MphDomainMatcher").Base(err)
}
errors.LogDebug(context.Background(), "MphDomainMatcher is enabled for ", len(rr.Domain), " domain rule(s)")
}
errors.LogDebug(context.Background(), "MphDomainMatcher is enabled for ", len(rr.Domain), " domain rule(s)")
conds.Add(matcher)
rr.Domain = nil
runtime.GC()
@@ -172,3 +188,20 @@ func (br *BalancingRule) Build(ohm outbound.Manager, dispatcher routing.Dispatch
return nil, errors.New("unrecognized balancer type")
}
}
func GetDomainMatcherWithRuleTag(domainMatcherPath string, ruleTag string) (*DomainMatcher, error) {
f, err := filesystem.NewFileReader(domainMatcherPath)
if err != nil {
return nil, errors.New("failed to load file: ", domainMatcherPath).Base(err)
}
defer f.Close()
g, err := LoadGeoSiteMatcher(f, ruleTag)
if err != nil {
return nil, errors.New("failed to load file:", domainMatcherPath).Base(err)
}
return &DomainMatcher{
Matchers: g,
}, nil
}

View File

@@ -0,0 +1,100 @@
package router
import (
"encoding/gob"
"errors"
"io"
"runtime"
"github.com/xtls/xray-core/common/strmatcher"
)
type geoSiteListGob struct {
Sites map[string][]byte
Deps map[string][]string
Hosts map[string][]string
}
func SerializeGeoSiteList(sites []*GeoSite, deps map[string][]string, hosts map[string][]string, w io.Writer) error {
data := geoSiteListGob{
Sites: make(map[string][]byte),
Deps: deps,
Hosts: hosts,
}
for _, site := range sites {
if site == nil {
continue
}
var buf bytesWriter
if err := SerializeDomainMatcher(site.Domain, &buf); err != nil {
return err
}
data.Sites[site.CountryCode] = buf.Bytes()
}
return gob.NewEncoder(w).Encode(data)
}
type bytesWriter struct {
data []byte
}
func (w *bytesWriter) Write(p []byte) (n int, err error) {
w.data = append(w.data, p...)
return len(p), nil
}
func (w *bytesWriter) Bytes() []byte {
return w.data
}
func LoadGeoSiteMatcher(r io.Reader, countryCode string) (strmatcher.IndexMatcher, error) {
var data geoSiteListGob
if err := gob.NewDecoder(r).Decode(&data); err != nil {
return nil, err
}
return loadWithDeps(&data, countryCode, make(map[string]bool))
}
func loadWithDeps(data *geoSiteListGob, code string, visited map[string]bool) (strmatcher.IndexMatcher, error) {
if visited[code] {
return nil, errors.New("cyclic dependency")
}
visited[code] = true
var matchers []strmatcher.IndexMatcher
if siteData, ok := data.Sites[code]; ok {
m, err := NewDomainMatcherFromBuffer(siteData)
if err == nil {
matchers = append(matchers, m)
}
}
if deps, ok := data.Deps[code]; ok {
for _, dep := range deps {
m, err := loadWithDeps(data, dep, visited)
if err == nil {
matchers = append(matchers, m)
}
}
}
if len(matchers) == 0 {
return nil, errors.New("matcher not found for: " + code)
}
if len(matchers) == 1 {
return matchers[0], nil
}
runtime.GC()
return &strmatcher.IndexMatcherGroup{Matchers: matchers}, nil
}
func LoadGeoSiteHosts(r io.Reader) (map[string][]string, error) {
var data geoSiteListGob
if err := gob.NewDecoder(r).Decode(&data); err != nil {
return nil, err
}
return data.Hosts, nil
}