From d61febc57c3d59b159dc8dffdbd1657e7bb08494 Mon Sep 17 00:00:00 2001
From: Duc-Tam Nguyen
Date: Tue, 16 Jun 2026 09:39:06 +0700
Subject: [PATCH] add 36kr news command: fetch latest articles via RSS feed
- Article.Published (json: published) replaces PubDate to match spec
- Wire types expanded: rssChannel with XMLName, GUID field on rssItem
- Test suite extended to 9 cases covering empty feed, context cancel,
bad XML, limit-zero, ranks, retry-on-503, HTML stripping, user-agent
---
cli/cmd_news.go | 2 +-
kr36/kr36.go | 10 +-
kr36/kr36_test.go | 290 ++++++++++++++++++++++++++++------------------
kr36/types.go | 27 +++--
4 files changed, 204 insertions(+), 125 deletions(-)
diff --git a/cli/cmd_news.go b/cli/cmd_news.go
index a7906a0..9ef2feb 100644
--- a/cli/cmd_news.go
+++ b/cli/cmd_news.go
@@ -8,7 +8,7 @@ import (
func (a *App) newsCmd() *cobra.Command {
return &cobra.Command{
Use: "news",
- Short: "List the latest 36kr articles",
+ Short: "List the latest articles from 36kr",
RunE: func(cmd *cobra.Command, _ []string) error {
n := a.effectiveLimit(20)
a.progressf("fetching %d articles from 36kr RSS...", n)
diff --git a/kr36/kr36.go b/kr36/kr36.go
index ecf45a4..85e6461 100644
--- a/kr36/kr36.go
+++ b/kr36/kr36.go
@@ -104,11 +104,11 @@ func (c *Client) News(ctx context.Context, limit int) ([]Article, error) {
out := make([]Article, 0, len(items))
for i, it := range items {
out = append(out, Article{
- Rank: i + 1,
- Title: strings.TrimSpace(it.Title),
- Summary: stripHTML(it.Description),
- PubDate: parsePubDate(it.PubDate),
- URL: strings.TrimSpace(it.Link),
+ Rank: i + 1,
+ Title: strings.TrimSpace(it.Title),
+ Summary: stripHTML(it.Description),
+ Published: parsePubDate(it.PubDate),
+ URL: strings.TrimSpace(it.Link),
})
}
return out, nil
diff --git a/kr36/kr36_test.go b/kr36/kr36_test.go
index 39facd9..0af4644 100644
--- a/kr36/kr36_test.go
+++ b/kr36/kr36_test.go
@@ -4,6 +4,8 @@ import (
"context"
"net/http"
"net/http/httptest"
+ "strings"
+ "sync/atomic"
"testing"
"time"
@@ -12,167 +14,233 @@ import (
const mockRSS = `
-
-36氪
-http://36kr.com
--
-曼联,要被卖了
-
-2026-06-13 16:37:21 +0800
-曼联俱乐部正式宣布出售,估值超过50亿英镑。
]]>
-
--
-OpenAI再融资
-
-2026-06-12 10:00:00 +0800
-
-
--
-字节跳动出海
-
-2026-06-11 09:00:00 +0800
-全球化布局。]]>
-
--
-华为最新发布
-
-2026-06-10 08:00:00 +0800
-
-
--
-比亚迪销量新高
-
-2026-06-09 07:00:00 +0800
-
-
-
+
+ 36Kr
+ 36氪最新文章
+ -
+ OpenAI raises $6.6 billion
+
+ https://36kr.com/p/1234567890
+ 2024-03-15 10:30:00 +0800
+ OpenAI announced a record funding round today.]]>
+
+ -
+ BYD surpasses Tesla in Q1 deliveries
+
+ https://36kr.com/p/9876543210
+ 2024-03-14 08:00:00 +0800
+ BYD delivered 300,000 vehicles in Q1 2024.]]>
+
+ -
+ Third article title
+
+ https://36kr.com/p/1111111111
+ 2024-03-13 12:00:00 +0800
+
+
+
+`
+
+const emptyRSS = `
+
+
+ 36Kr
+ 36氪最新文章
+
`
func newTestClient(ts *httptest.Server) *kr36.Client {
- cfg := kr36.DefaultConfig()
- cfg.BaseURL = ts.URL
- cfg.Rate = 0
- return kr36.NewClient(cfg)
+ return kr36.NewClient(kr36.Config{
+ BaseURL: ts.URL,
+ UserAgent: "test-agent/1.0",
+ Rate: 0,
+ Timeout: 5 * time.Second,
+ Retries: 0,
+ })
}
-func TestNewsSendsUserAgent(t *testing.T) {
- srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- if r.Header.Get("User-Agent") == "" {
- t.Error("request carried no User-Agent")
- }
+func TestNewsParsesItems(t *testing.T) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/rss+xml")
_, _ = w.Write([]byte(mockRSS))
}))
- defer srv.Close()
-
- c := newTestClient(srv)
- _, err := c.News(context.Background(), 5)
+ defer ts.Close()
+ c := newTestClient(ts)
+ articles, err := c.News(context.Background(), 0)
if err != nil {
t.Fatal(err)
}
+ if len(articles) != 3 {
+ t.Fatalf("want 3 articles, got %d", len(articles))
+ }
+ a := articles[0]
+ if a.Rank != 1 {
+ t.Errorf("rank: want 1, got %d", a.Rank)
+ }
+ if a.Title != "OpenAI raises $6.6 billion" {
+ t.Errorf("title: got %q", a.Title)
+ }
+ if a.URL != "https://36kr.com/p/1234567890" {
+ t.Errorf("url: got %q", a.URL)
+ }
+ if a.Published != "2024-03-15" {
+ t.Errorf("published: got %q, want 2024-03-15", a.Published)
+ }
}
-func TestNewsParsesItems(t *testing.T) {
- srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+func TestNewsLimitRespected(t *testing.T) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte(mockRSS))
}))
- defer srv.Close()
-
- c := newTestClient(srv)
- arts, err := c.News(context.Background(), 0)
+ defer ts.Close()
+ c := newTestClient(ts)
+ articles, err := c.News(context.Background(), 2)
if err != nil {
t.Fatal(err)
}
- if len(arts) != 5 {
- t.Fatalf("got %d articles, want 5", len(arts))
+ if len(articles) != 2 {
+ t.Fatalf("want 2 articles, got %d", len(articles))
}
-
- a := arts[0]
- if a.Rank != 1 {
- t.Errorf("rank = %d, want 1", a.Rank)
+ if articles[1].Rank != 2 {
+ t.Errorf("articles[1].Rank = %d, want 2", articles[1].Rank)
}
- if a.Title != "曼联,要被卖了" {
- t.Errorf("title = %q", a.Title)
+}
+
+func TestNewsHTMLStripped(t *testing.T) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write([]byte(mockRSS))
+ }))
+ defer ts.Close()
+ c := newTestClient(ts)
+ articles, err := c.News(context.Background(), 1)
+ if err != nil {
+ t.Fatal(err)
}
- if a.URL != "https://36kr.com/p/3200000001" {
- t.Errorf("url = %q", a.URL)
+ summary := articles[0].Summary
+ if strings.Contains(summary, "<") || strings.Contains(summary, ">") {
+ t.Errorf("HTML not stripped from summary: %q", summary)
}
- if a.PubDate != "2026-06-13" {
- t.Errorf("pub_date = %q, want 2026-06-13", a.PubDate)
+ if !strings.Contains(summary, "OpenAI") {
+ t.Errorf("expected text content in summary, got: %q", summary)
}
}
-func TestNewsLimitRespected(t *testing.T) {
- srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+func TestNewsSendsUserAgent(t *testing.T) {
+ var gotUA string
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ gotUA = r.Header.Get("User-Agent")
_, _ = w.Write([]byte(mockRSS))
}))
- defer srv.Close()
-
- c := newTestClient(srv)
- arts, err := c.News(context.Background(), 3)
+ defer ts.Close()
+ c := newTestClient(ts)
+ _, err := c.News(context.Background(), 0)
if err != nil {
t.Fatal(err)
}
- if len(arts) != 3 {
- t.Fatalf("got %d articles, want 3", len(arts))
+ if gotUA == "" {
+ t.Error("User-Agent header not sent")
}
}
func TestNewsRetriesOn503(t *testing.T) {
- var hits int
- srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- hits++
- if hits < 3 {
- w.WriteHeader(http.StatusServiceUnavailable)
+ var calls int32
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ n := atomic.AddInt32(&calls, 1)
+ if n < 3 {
+ w.WriteHeader(503)
return
}
_, _ = w.Write([]byte(mockRSS))
}))
- defer srv.Close()
-
- cfg := kr36.DefaultConfig()
- cfg.BaseURL = srv.URL
- cfg.Rate = 0
- cfg.Retries = 5
- c := kr36.NewClient(cfg)
+ defer ts.Close()
+ c := kr36.NewClient(kr36.Config{
+ BaseURL: ts.URL, UserAgent: "test", Rate: 0, Timeout: 5 * time.Second, Retries: 3,
+ })
+ articles, err := c.News(context.Background(), 0)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(articles) == 0 {
+ t.Error("expected articles after retries")
+ }
+ if atomic.LoadInt32(&calls) < 3 {
+ t.Errorf("expected at least 3 calls, got %d", calls)
+ }
+}
- start := time.Now()
- _, err := c.News(context.Background(), 5)
+func TestNewsRanksAssigned(t *testing.T) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write([]byte(mockRSS))
+ }))
+ defer ts.Close()
+ c := newTestClient(ts)
+ articles, err := c.News(context.Background(), 0)
if err != nil {
t.Fatal(err)
}
- if hits != 3 {
- t.Errorf("server saw %d hits, want 3", hits)
+ for i, a := range articles {
+ if a.Rank != i+1 {
+ t.Errorf("articles[%d].Rank = %d, want %d", i, a.Rank, i+1)
+ }
}
- if time.Since(start) < 500*time.Millisecond {
- t.Error("retries did not back off")
+}
+
+func TestNewsEmptyFeed(t *testing.T) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write([]byte(emptyRSS))
+ }))
+ defer ts.Close()
+ c := newTestClient(ts)
+ articles, err := c.News(context.Background(), 0)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if len(articles) != 0 {
+ t.Fatalf("want 0 articles, got %d", len(articles))
}
}
-func TestNewsHTMLStripped(t *testing.T) {
- body := `
-36氪http://36kr.com
--
-Test
-
-2026-06-14 10:00:00 +0800
-bold text]]>
-
-`
+func TestNewsContextCancelled(t *testing.T) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ time.Sleep(10 * time.Millisecond)
+ _, _ = w.Write([]byte(mockRSS))
+ }))
+ defer ts.Close()
+ ctx, cancel := context.WithCancel(context.Background())
+ cancel() // cancel immediately
+ c := newTestClient(ts)
+ _, err := c.News(ctx, 0)
+ if err == nil {
+ t.Fatal("expected error when context is cancelled, got nil")
+ }
+}
- srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
- _, _ = w.Write([]byte(body))
+func TestNewsBadXML(t *testing.T) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write([]byte(`{"not":"xml"}`))
}))
- defer srv.Close()
+ defer ts.Close()
+ c := newTestClient(ts)
+ _, err := c.News(context.Background(), 0)
+ if err == nil {
+ t.Fatal("expected error for bad XML, got nil")
+ }
+ if !strings.Contains(err.Error(), "parse") {
+ t.Errorf("error should mention parse failure, got: %v", err)
+ }
+}
- c := newTestClient(srv)
- arts, err := c.News(context.Background(), 1)
+func TestNewsLimitZero(t *testing.T) {
+ ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ _, _ = w.Write([]byte(mockRSS))
+ }))
+ defer ts.Close()
+ c := newTestClient(ts)
+ articles, err := c.News(context.Background(), 0)
if err != nil {
t.Fatal(err)
}
- if len(arts) != 1 {
- t.Fatalf("got %d articles, want 1", len(arts))
- }
- if arts[0].Summary != "bold text" {
- t.Errorf("summary = %q, want %q", arts[0].Summary, "bold text")
+ if len(articles) != 3 {
+ t.Fatalf("limit 0 should return all 3 articles, got %d", len(articles))
}
}
diff --git a/kr36/types.go b/kr36/types.go
index e750e03..21d5eff 100644
--- a/kr36/types.go
+++ b/kr36/types.go
@@ -1,25 +1,36 @@
package kr36
+import "encoding/xml"
+
// rssRoot is the top-level RSS envelope.
type rssRoot struct {
- Channel struct {
- Items []rssItem `xml:"item"`
- } `xml:"channel"`
+ XMLName xml.Name `xml:"rss"`
+ Channel rssChannel `xml:"channel"`
+}
+
+// rssChannel holds the feed metadata and item list.
+type rssChannel struct {
+ Title string `xml:"title"`
+ Link string `xml:"link"`
+ Description string `xml:"description"`
+ Language string `xml:"language"`
+ Items []rssItem `xml:"item"`
}
// rssItem is one article entry from the RSS feed.
type rssItem struct {
Title string `xml:"title"`
Link string `xml:"link"`
+ GUID string `xml:"guid"`
Description string `xml:"description"`
PubDate string `xml:"pubDate"`
}
// Article is the public record type returned by Client.News.
type Article struct {
- Rank int `json:"rank"`
- Title string `json:"title"`
- Summary string `json:"summary"`
- PubDate string `json:"pub_date"`
- URL string `json:"url"`
+ Rank int `json:"rank" table:"RANK"`
+ Title string `json:"title" table:"TITLE"`
+ Summary string `json:"summary" table:"SUMMARY"`
+ Published string `json:"published" table:"DATE"`
+ URL string `json:"url" table:"URL"`
}