From 2fd7aea3003c5f62c0f67bbb586c5c8a139a09e6 Mon Sep 17 00:00:00 2001 From: Duc-Tam Nguyen Date: Mon, 15 Jun 2026 23:48:29 +0700 Subject: [PATCH] Add kit domain layer for 36Kr Wires the existing kr36 package into the any-cli/kit framework so the binary and a multi-domain host share one source of truth: - domain.go: Domain struct, Register (article resolver + news list op), newClient factory, Classify/Locate URI helpers - domain_test.go: offline tests for DomainInfo, Classify, Locate, host wiring (Mint + ResolveOn round-trip) - types.go: add ID field and kit/table struct tags to Article - kr36.go: add Host constant, populate Article.ID from URL, strip feed-tracking query params from the URL field - go.mod/go.sum: add github.com/tamnd/any-cli v0.2.0 All 9 tests pass (5 existing + 4 new domain tests). --- go.mod | 13 +++- go.sum | 22 ++++++ kr36/domain.go | 160 ++++++++++++++++++++++++++++++++++++++++++++ kr36/domain_test.go | 70 +++++++++++++++++++ kr36/kr36.go | 21 +++++- kr36/types.go | 11 +-- 6 files changed, 289 insertions(+), 8 deletions(-) create mode 100644 kr36/domain.go create mode 100644 kr36/domain_test.go diff --git a/go.mod b/go.mod index f392975..9a3ea91 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,8 @@ require ( github.com/clipperhouse/displaywidth v0.4.1 // indirect github.com/clipperhouse/stringish v0.1.1 // indirect github.com/clipperhouse/uax29/v2 v2.3.0 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/lucasb-eyer/go-colorful v1.3.0 // indirect github.com/mattn/go-runewidth v0.0.19 // indirect @@ -28,10 +30,17 @@ require ( github.com/muesli/mango-cobra v1.2.0 // indirect github.com/muesli/mango-pflag v0.1.0 // indirect github.com/muesli/roff v0.1.0 // indirect + github.com/ncruces/go-strftime v1.0.0 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/spf13/pflag v1.0.9 // indirect + github.com/tamnd/any-cli v0.2.0 // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect - golang.org/x/sync v0.17.0 // indirect - golang.org/x/sys v0.37.0 // indirect + golang.org/x/sync v0.20.0 // indirect + golang.org/x/sys v0.42.0 // indirect golang.org/x/text v0.24.0 // indirect + modernc.org/libc v1.72.3 // indirect + modernc.org/mathutil v1.7.1 // indirect + modernc.org/memory v1.11.0 // indirect + modernc.org/sqlite v1.52.0 // indirect ) diff --git a/go.sum b/go.sum index e7d4564..276f727 100644 --- a/go.sum +++ b/go.sum @@ -29,6 +29,10 @@ github.com/clipperhouse/uax29/v2 v2.3.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsV github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag= @@ -47,8 +51,12 @@ github.com/muesli/mango-pflag v0.1.0 h1:UADqbYgpUyRoBja3g6LUL+3LErjpsOwaC9ywvBWe github.com/muesli/mango-pflag v0.1.0/go.mod h1:YEQomTxaCUp8PrbhFh10UfbhbQrM/xJ4i2PB8VTLLW0= github.com/muesli/roff v0.1.0 h1:YD0lalCotmYuF5HhZliKWlIx7IEhiXeSfq7hNjFqGF8= github.com/muesli/roff v0.1.0/go.mod h1:pjAHQM9hdUUwm/krAfrLGgJkXJ+YuhtsfZ42kieB2Ig= +github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= +github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -58,6 +66,8 @@ github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/tamnd/any-cli v0.2.0 h1:0m4a4ssG9fd6a/n9/5BnNO2WnB3Emt4OwFdY4y7WTEU= +github.com/tamnd/any-cli v0.2.0/go.mod h1:eX/Ak1Ccn1eTBmkFouKtEzg9TG375tUu8zpIrr0GZF8= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= @@ -65,10 +75,22 @@ golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= +golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ= golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= +golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/text v0.24.0 h1:dd5Bzh4yt5KYA8f9CJHCP4FB4D51c2c6JvN37xJJkJ0= golang.org/x/text v0.24.0/go.mod h1:L8rBsPeo2pSS+xqN0d5u2ikmjtmoJbDBT1b7nHvFCdU= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +modernc.org/libc v1.72.3 h1:ZnDF4tXn4NBXFutMMQC4vtbTFSXhhKzR73fv0beZEAU= +modernc.org/libc v1.72.3/go.mod h1:dn0dZNnnn1clLyvRxLxYExxiKRZIRENOfqQ8XEeg4Qs= +modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= +modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= +modernc.org/memory v1.11.0 h1:o4QC8aMQzmcwCK3t3Ux/ZHmwFPzE6hf2Y5LbkRs+hbI= +modernc.org/memory v1.11.0/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= +modernc.org/sqlite v1.52.0 h1:p4dhYh2tXZCiyaqHwRVJDjIGKWyXayiQpThxgDzJaxo= +modernc.org/sqlite v1.52.0/go.mod h1:tcNzv5p84E0skkmJn038y+hWJbLQXQqEnQfeh5r2JLM= diff --git a/kr36/domain.go b/kr36/domain.go new file mode 100644 index 0000000..13e5a29 --- /dev/null +++ b/kr36/domain.go @@ -0,0 +1,160 @@ +package kr36 + +import ( + "context" + "regexp" + "strings" + + "github.com/tamnd/any-cli/kit" + "github.com/tamnd/any-cli/kit/errs" +) + +// domain.go exposes kr36 as a kit Domain so a multi-domain host can +// blank-import it: +// +// import _ "github.com/tamnd/kr36-cli/kr36" +// +// The same Domain also builds the standalone kr36 binary (cmd/kr36). +func init() { kit.Register(Domain{}) } + +// Domain is the kr36 driver. +type Domain struct{} + +// Info describes the scheme, the hostnames a pasted link is matched against, +// and the identity reused for the binary's help and version. +func (Domain) Info() kit.DomainInfo { + return kit.DomainInfo{ + Scheme: "kr36", + Hosts: []string{Host}, + Identity: kit.Identity{ + Binary: "kr36", + Short: "A command line for 36Kr tech news.", + Long: `A command line for 36Kr tech news. + +kr36 reads public 36Kr data over plain HTTPS, shapes it into clean records, +and prints output that pipes into the rest of your tools. No API key, nothing +to run alongside it.`, + Site: Host, + Repo: "https://github.com/tamnd/kr36-cli", + }, + } +} + +// Register installs the client factory and every operation onto app. +func (Domain) Register(app *kit.App) { + app.SetClient(newClient) + + // article: resolver op so that kit can mint Article records and answer + // `kr36 article ` and `ant get kr36://article/`. + kit.Handle(app, kit.OpMeta{Name: "article", Group: "read", Single: true, + URIType: "article", Resolver: true, + Summary: "Resolve a 36Kr article ID to its URL", + Args: []kit.Arg{{Name: "id", Help: "numeric article ID"}}}, getArticle) + + // news: latest articles from the 36Kr RSS feed. + kit.Handle(app, kit.OpMeta{Name: "news", Group: "read", List: true, + URIType: "article", + Summary: "List the latest 36Kr articles from the RSS feed"}, getNews) +} + +// newClient builds the HTTP client from the kit host config. +func newClient(_ context.Context, cfg kit.Config) (any, error) { + c := DefaultConfig() + if cfg.UserAgent != "" { + c.UserAgent = cfg.UserAgent + } + if cfg.Rate > 0 { + c.Rate = cfg.Rate + } + if cfg.Retries > 0 { + c.Retries = cfg.Retries + } + if cfg.Timeout > 0 { + c.Timeout = cfg.Timeout + } + return NewClient(c), nil +} + +// --- inputs --- + +type articleInput struct { + ID string `kit:"arg" help:"numeric article ID"` + Client *Client `kit:"inject"` +} + +type newsInput struct { + Limit int `kit:"flag,inherit" help:"max results"` + Client *Client `kit:"inject"` +} + +// --- handlers --- + +func getArticle(_ context.Context, in articleInput, emit func(*Article) error) error { + id := strings.TrimSpace(in.ID) + if id == "" { + return errs.Usage("article id is required") + } + return emit(&Article{ + ID: id, + URL: "https://" + Host + "/p/" + id, + }) +} + +func getNews(ctx context.Context, in newsInput, emit func(*Article) error) error { + limit := in.Limit + if limit <= 0 { + limit = 20 + } + articles, err := in.Client.News(ctx, limit) + if err != nil { + return err + } + for i := range articles { + if err := emit(&articles[i]); err != nil { + return err + } + } + return nil +} + +// --- Resolver --- + +// articleURLRE matches a 36kr article URL and captures the numeric ID. +var articleURLRE = regexp.MustCompile(`(?:https?://)?36kr\.com/p/(\d+)`) + +// Classify turns a 36kr URL or numeric article ID into (type, id). +func (Domain) Classify(input string) (uriType, id string, err error) { + input = strings.TrimSpace(input) + if input == "" { + return "", "", errs.Usage("empty 36kr reference") + } + if m := articleURLRE.FindStringSubmatch(input); len(m) >= 2 { + return "article", m[1], nil + } + if isDigits(input) { + return "article", input, nil + } + return "", "", errs.Usage("unrecognized 36kr reference: %q", input) +} + +// Locate is the inverse: the live https URL for a (type, id). +func (Domain) Locate(uriType, id string) (string, error) { + if uriType != "article" { + return "", errs.Usage("kr36 has no resource type %q", uriType) + } + return "https://" + Host + "/p/" + id, nil +} + +// --- helpers --- + +func isDigits(s string) bool { + if s == "" { + return false + } + for _, r := range s { + if r < '0' || r > '9' { + return false + } + } + return true +} diff --git a/kr36/domain_test.go b/kr36/domain_test.go new file mode 100644 index 0000000..d8efea5 --- /dev/null +++ b/kr36/domain_test.go @@ -0,0 +1,70 @@ +package kr36 + +import ( + "testing" + + "github.com/tamnd/any-cli/kit" +) + +// These tests are offline: they exercise the URI driver's pure string functions +// and the host wiring (mint, resolve), which need no network. The client's +// HTTP behaviour is covered in kr36_test.go. + +func TestDomainInfo(t *testing.T) { + info := Domain{}.Info() + if info.Scheme != "kr36" { + t.Errorf("Scheme = %q, want kr36", info.Scheme) + } + if len(info.Hosts) == 0 || info.Hosts[0] != Host { + t.Errorf("Hosts = %v, want [%s]", info.Hosts, Host) + } + if info.Identity.Binary != "kr36" { + t.Errorf("Identity.Binary = %q, want kr36", info.Identity.Binary) + } +} + +func TestClassify(t *testing.T) { + cases := []struct{ in, typ, id string }{ + {"1234567890", "article", "1234567890"}, + {"https://36kr.com/p/1234567890", "article", "1234567890"}, + {"https://36kr.com/p/1234567890?f=rss", "article", "1234567890"}, + } + for _, tc := range cases { + typ, id, err := Domain{}.Classify(tc.in) + if err != nil || typ != tc.typ || id != tc.id { + t.Errorf("Classify(%q) = (%q, %q, %v), want (%q, %q, nil)", + tc.in, typ, id, err, tc.typ, tc.id) + } + } +} + +func TestLocate(t *testing.T) { + got, err := Domain{}.Locate("article", "1234567890") + want := "https://36kr.com/p/1234567890" + if err != nil || got != want { + t.Errorf("Locate = (%q, %v), want (%q, nil)", got, err, want) + } +} + +// TestHostWiring mounts the driver in a kit Host and checks the round trip: +// a record mints to its URI and a bare id resolves back to the same URI. +func TestHostWiring(t *testing.T) { + h, err := kit.Open() + if err != nil { + t.Fatal(err) + } + + a := &Article{Rank: 1, ID: "1234567890", Title: "Test", URL: "https://36kr.com/p/1234567890"} + u, err := h.Mint(a) + if err != nil { + t.Fatalf("Mint: %v", err) + } + if want := "kr36://article/1234567890"; u.String() != want { + t.Errorf("Mint = %q, want %q", u.String(), want) + } + + got, err := h.ResolveOn("kr36", "1234567890") + if err != nil || got.String() != "kr36://article/1234567890" { + t.Errorf("ResolveOn = (%q, %v), want kr36://article/1234567890", got.String(), err) + } +} diff --git a/kr36/kr36.go b/kr36/kr36.go index ecf45a4..ee3185e 100644 --- a/kr36/kr36.go +++ b/kr36/kr36.go @@ -21,6 +21,9 @@ import ( // DefaultUserAgent identifies the client to 36kr. const DefaultUserAgent = "Mozilla/5.0 (compatible; kr36/dev; +https://github.com/tamnd/kr36-cli)" +// Host is the canonical site hostname. +const Host = "36kr.com" + // Config holds constructor parameters. type Config struct { BaseURL string @@ -60,6 +63,10 @@ func NewClient(cfg Config) *Client { // htmlTagRe strips HTML tags from description fields. var htmlTagRe = regexp.MustCompile(`<[^>]+>`) +// articleIDRe extracts the numeric ID from a 36kr article URL like +// https://36kr.com/p/1234567890?f=rss +var articleIDRe = regexp.MustCompile(`/p/(\d+)`) + // stripHTML removes HTML tags and collapses whitespace. func stripHTML(s string) string { s = htmlTagRe.ReplaceAllString(s, " ") @@ -103,12 +110,24 @@ func (c *Client) News(ctx context.Context, limit int) ([]Article, error) { out := make([]Article, 0, len(items)) for i, it := range items { + link := strings.TrimSpace(it.Link) + // Extract the numeric article ID from the URL. + id := link + if m := articleIDRe.FindStringSubmatch(link); len(m) >= 2 { + id = m[1] + } + // Strip feed-tracking query parameters from the URL. + cleanURL := link + if idx := strings.IndexByte(cleanURL, '?'); idx >= 0 { + cleanURL = cleanURL[:idx] + } out = append(out, Article{ Rank: i + 1, + ID: id, Title: strings.TrimSpace(it.Title), Summary: stripHTML(it.Description), PubDate: parsePubDate(it.PubDate), - URL: strings.TrimSpace(it.Link), + URL: cleanURL, }) } return out, nil diff --git a/kr36/types.go b/kr36/types.go index e750e03..f08b9f8 100644 --- a/kr36/types.go +++ b/kr36/types.go @@ -17,9 +17,10 @@ type rssItem struct { // Article is the public record type returned by Client.News. type Article struct { - Rank int `json:"rank"` - Title string `json:"title"` - Summary string `json:"summary"` - PubDate string `json:"pub_date"` - URL string `json:"url"` + Rank int `json:"rank" table:"rank"` + ID string `json:"id" kit:"id" table:"id"` + Title string `json:"title" table:"title"` + Summary string `json:"summary,omitempty" table:"-"` + PubDate string `json:"pub_date,omitempty" table:"pub_date"` + URL string `json:"url" table:"url,url"` }