diff --git a/cli/cmd_news.go b/cli/cmd_news.go index a7906a0..9ef2feb 100644 --- a/cli/cmd_news.go +++ b/cli/cmd_news.go @@ -8,7 +8,7 @@ import ( func (a *App) newsCmd() *cobra.Command { return &cobra.Command{ Use: "news", - Short: "List the latest 36kr articles", + Short: "List the latest articles from 36kr", RunE: func(cmd *cobra.Command, _ []string) error { n := a.effectiveLimit(20) a.progressf("fetching %d articles from 36kr RSS...", n) diff --git a/kr36/kr36.go b/kr36/kr36.go index ecf45a4..85e6461 100644 --- a/kr36/kr36.go +++ b/kr36/kr36.go @@ -104,11 +104,11 @@ func (c *Client) News(ctx context.Context, limit int) ([]Article, error) { out := make([]Article, 0, len(items)) for i, it := range items { out = append(out, Article{ - Rank: i + 1, - Title: strings.TrimSpace(it.Title), - Summary: stripHTML(it.Description), - PubDate: parsePubDate(it.PubDate), - URL: strings.TrimSpace(it.Link), + Rank: i + 1, + Title: strings.TrimSpace(it.Title), + Summary: stripHTML(it.Description), + Published: parsePubDate(it.PubDate), + URL: strings.TrimSpace(it.Link), }) } return out, nil diff --git a/kr36/kr36_test.go b/kr36/kr36_test.go index 39facd9..0af4644 100644 --- a/kr36/kr36_test.go +++ b/kr36/kr36_test.go @@ -4,6 +4,8 @@ import ( "context" "net/http" "net/http/httptest" + "strings" + "sync/atomic" "testing" "time" @@ -12,167 +14,233 @@ import ( const mockRSS = ` - -36氪 -http://36kr.com - -曼联,要被卖了 - -2026-06-13 16:37:21 +0800 -曼联俱乐部正式宣布出售,估值超过50亿英镑。

]]>
-
- -OpenAI再融资 - -2026-06-12 10:00:00 +0800 - - - -字节跳动出海 - -2026-06-11 09:00:00 +0800 -全球化布局。]]> - - -华为最新发布 - -2026-06-10 08:00:00 +0800 - - - -比亚迪销量新高 - -2026-06-09 07:00:00 +0800 - - -
+ + 36Kr + 36氪最新文章 + + OpenAI raises $6.6 billion + + https://36kr.com/p/1234567890 + 2024-03-15 10:30:00 +0800 + OpenAI announced a record funding round today.

]]>
+
+ + BYD surpasses Tesla in Q1 deliveries + + https://36kr.com/p/9876543210 + 2024-03-14 08:00:00 +0800 + BYD delivered 300,000 vehicles in Q1 2024.

]]>
+
+ + Third article title + + https://36kr.com/p/1111111111 + 2024-03-13 12:00:00 +0800 + + +
+
` + +const emptyRSS = ` + + + 36Kr + 36氪最新文章 + ` func newTestClient(ts *httptest.Server) *kr36.Client { - cfg := kr36.DefaultConfig() - cfg.BaseURL = ts.URL - cfg.Rate = 0 - return kr36.NewClient(cfg) + return kr36.NewClient(kr36.Config{ + BaseURL: ts.URL, + UserAgent: "test-agent/1.0", + Rate: 0, + Timeout: 5 * time.Second, + Retries: 0, + }) } -func TestNewsSendsUserAgent(t *testing.T) { - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - if r.Header.Get("User-Agent") == "" { - t.Error("request carried no User-Agent") - } +func TestNewsParsesItems(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/rss+xml") _, _ = w.Write([]byte(mockRSS)) })) - defer srv.Close() - - c := newTestClient(srv) - _, err := c.News(context.Background(), 5) + defer ts.Close() + c := newTestClient(ts) + articles, err := c.News(context.Background(), 0) if err != nil { t.Fatal(err) } + if len(articles) != 3 { + t.Fatalf("want 3 articles, got %d", len(articles)) + } + a := articles[0] + if a.Rank != 1 { + t.Errorf("rank: want 1, got %d", a.Rank) + } + if a.Title != "OpenAI raises $6.6 billion" { + t.Errorf("title: got %q", a.Title) + } + if a.URL != "https://36kr.com/p/1234567890" { + t.Errorf("url: got %q", a.URL) + } + if a.Published != "2024-03-15" { + t.Errorf("published: got %q, want 2024-03-15", a.Published) + } } -func TestNewsParsesItems(t *testing.T) { - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { +func TestNewsLimitRespected(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(mockRSS)) })) - defer srv.Close() - - c := newTestClient(srv) - arts, err := c.News(context.Background(), 0) + defer ts.Close() + c := newTestClient(ts) + articles, err := c.News(context.Background(), 2) if err != nil { t.Fatal(err) } - if len(arts) != 5 { - t.Fatalf("got %d articles, want 5", len(arts)) + if len(articles) != 2 { + t.Fatalf("want 2 articles, got %d", len(articles)) } - - a := arts[0] - if a.Rank != 1 { - t.Errorf("rank = %d, want 1", a.Rank) + if articles[1].Rank != 2 { + t.Errorf("articles[1].Rank = %d, want 2", articles[1].Rank) } - if a.Title != "曼联,要被卖了" { - t.Errorf("title = %q", a.Title) +} + +func TestNewsHTMLStripped(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(mockRSS)) + })) + defer ts.Close() + c := newTestClient(ts) + articles, err := c.News(context.Background(), 1) + if err != nil { + t.Fatal(err) } - if a.URL != "https://36kr.com/p/3200000001" { - t.Errorf("url = %q", a.URL) + summary := articles[0].Summary + if strings.Contains(summary, "<") || strings.Contains(summary, ">") { + t.Errorf("HTML not stripped from summary: %q", summary) } - if a.PubDate != "2026-06-13" { - t.Errorf("pub_date = %q, want 2026-06-13", a.PubDate) + if !strings.Contains(summary, "OpenAI") { + t.Errorf("expected text content in summary, got: %q", summary) } } -func TestNewsLimitRespected(t *testing.T) { - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { +func TestNewsSendsUserAgent(t *testing.T) { + var gotUA string + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotUA = r.Header.Get("User-Agent") _, _ = w.Write([]byte(mockRSS)) })) - defer srv.Close() - - c := newTestClient(srv) - arts, err := c.News(context.Background(), 3) + defer ts.Close() + c := newTestClient(ts) + _, err := c.News(context.Background(), 0) if err != nil { t.Fatal(err) } - if len(arts) != 3 { - t.Fatalf("got %d articles, want 3", len(arts)) + if gotUA == "" { + t.Error("User-Agent header not sent") } } func TestNewsRetriesOn503(t *testing.T) { - var hits int - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - hits++ - if hits < 3 { - w.WriteHeader(http.StatusServiceUnavailable) + var calls int32 + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + n := atomic.AddInt32(&calls, 1) + if n < 3 { + w.WriteHeader(503) return } _, _ = w.Write([]byte(mockRSS)) })) - defer srv.Close() - - cfg := kr36.DefaultConfig() - cfg.BaseURL = srv.URL - cfg.Rate = 0 - cfg.Retries = 5 - c := kr36.NewClient(cfg) + defer ts.Close() + c := kr36.NewClient(kr36.Config{ + BaseURL: ts.URL, UserAgent: "test", Rate: 0, Timeout: 5 * time.Second, Retries: 3, + }) + articles, err := c.News(context.Background(), 0) + if err != nil { + t.Fatal(err) + } + if len(articles) == 0 { + t.Error("expected articles after retries") + } + if atomic.LoadInt32(&calls) < 3 { + t.Errorf("expected at least 3 calls, got %d", calls) + } +} - start := time.Now() - _, err := c.News(context.Background(), 5) +func TestNewsRanksAssigned(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(mockRSS)) + })) + defer ts.Close() + c := newTestClient(ts) + articles, err := c.News(context.Background(), 0) if err != nil { t.Fatal(err) } - if hits != 3 { - t.Errorf("server saw %d hits, want 3", hits) + for i, a := range articles { + if a.Rank != i+1 { + t.Errorf("articles[%d].Rank = %d, want %d", i, a.Rank, i+1) + } } - if time.Since(start) < 500*time.Millisecond { - t.Error("retries did not back off") +} + +func TestNewsEmptyFeed(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(emptyRSS)) + })) + defer ts.Close() + c := newTestClient(ts) + articles, err := c.News(context.Background(), 0) + if err != nil { + t.Fatal(err) + } + if len(articles) != 0 { + t.Fatalf("want 0 articles, got %d", len(articles)) } } -func TestNewsHTMLStripped(t *testing.T) { - body := ` -36氪http://36kr.com - -Test - -2026-06-14 10:00:00 +0800 -bold text]]> - -` +func TestNewsContextCancelled(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(10 * time.Millisecond) + _, _ = w.Write([]byte(mockRSS)) + })) + defer ts.Close() + ctx, cancel := context.WithCancel(context.Background()) + cancel() // cancel immediately + c := newTestClient(ts) + _, err := c.News(ctx, 0) + if err == nil { + t.Fatal("expected error when context is cancelled, got nil") + } +} - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - _, _ = w.Write([]byte(body)) +func TestNewsBadXML(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(`{"not":"xml"}`)) })) - defer srv.Close() + defer ts.Close() + c := newTestClient(ts) + _, err := c.News(context.Background(), 0) + if err == nil { + t.Fatal("expected error for bad XML, got nil") + } + if !strings.Contains(err.Error(), "parse") { + t.Errorf("error should mention parse failure, got: %v", err) + } +} - c := newTestClient(srv) - arts, err := c.News(context.Background(), 1) +func TestNewsLimitZero(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(mockRSS)) + })) + defer ts.Close() + c := newTestClient(ts) + articles, err := c.News(context.Background(), 0) if err != nil { t.Fatal(err) } - if len(arts) != 1 { - t.Fatalf("got %d articles, want 1", len(arts)) - } - if arts[0].Summary != "bold text" { - t.Errorf("summary = %q, want %q", arts[0].Summary, "bold text") + if len(articles) != 3 { + t.Fatalf("limit 0 should return all 3 articles, got %d", len(articles)) } } diff --git a/kr36/types.go b/kr36/types.go index e750e03..21d5eff 100644 --- a/kr36/types.go +++ b/kr36/types.go @@ -1,25 +1,36 @@ package kr36 +import "encoding/xml" + // rssRoot is the top-level RSS envelope. type rssRoot struct { - Channel struct { - Items []rssItem `xml:"item"` - } `xml:"channel"` + XMLName xml.Name `xml:"rss"` + Channel rssChannel `xml:"channel"` +} + +// rssChannel holds the feed metadata and item list. +type rssChannel struct { + Title string `xml:"title"` + Link string `xml:"link"` + Description string `xml:"description"` + Language string `xml:"language"` + Items []rssItem `xml:"item"` } // rssItem is one article entry from the RSS feed. type rssItem struct { Title string `xml:"title"` Link string `xml:"link"` + GUID string `xml:"guid"` Description string `xml:"description"` PubDate string `xml:"pubDate"` } // Article is the public record type returned by Client.News. type Article struct { - Rank int `json:"rank"` - Title string `json:"title"` - Summary string `json:"summary"` - PubDate string `json:"pub_date"` - URL string `json:"url"` + Rank int `json:"rank" table:"RANK"` + Title string `json:"title" table:"TITLE"` + Summary string `json:"summary" table:"SUMMARY"` + Published string `json:"published" table:"DATE"` + URL string `json:"url" table:"URL"` }