|
@@ -0,0 +1,56 @@
|
|
1
|
+package job
|
|
2
|
+
|
|
3
|
+import (
|
|
4
|
+ "fmt"
|
|
5
|
+ _ "github.com/go-sql-driver/mysql"
|
|
6
|
+ "github.com/spf13/cobra"
|
|
7
|
+ "github.com/gocolly/colly"
|
|
8
|
+)
|
|
9
|
+
|
|
10
|
+func test() {
|
|
11
|
+
|
|
12
|
+ // Instantiate default collector
|
|
13
|
+ c := colly.NewCollector(
|
|
14
|
+ // Visit only domains: hackerspaces.org, wiki.hackerspaces.org
|
|
15
|
+ colly.AllowedDomains("dict.cn", "m.dict.cn"),
|
|
16
|
+ )
|
|
17
|
+
|
|
18
|
+ // On every a element which has href attribute call callback
|
|
19
|
+ c.OnHTML("div[class=sent]", func(e *colly.HTMLElement) {
|
|
20
|
+ //link := e.Attr("href")
|
|
21
|
+ // Print link
|
|
22
|
+ fmt.Printf("Link found: %q -> %s\n", e.Text)
|
|
23
|
+ // Visit link found on page
|
|
24
|
+ // Only those links are visited which are in AllowedDomains
|
|
25
|
+ //c.Visit(e.Request.AbsoluteURL(link))
|
|
26
|
+ })
|
|
27
|
+
|
|
28
|
+ /**
|
|
29
|
+ 1. div[class=sent]
|
|
30
|
+ 2. 例句 div[class=layout sort]
|
|
31
|
+ 3. 词法用法 div[class=section learn]
|
|
32
|
+ 4.
|
|
33
|
+ */
|
|
34
|
+
|
|
35
|
+ // Before making a request print "Visiting ..."
|
|
36
|
+ c.OnRequest(func(r *colly.Request) {
|
|
37
|
+ fmt.Println("Visiting", r.URL.String())
|
|
38
|
+ })
|
|
39
|
+
|
|
40
|
+ // Start scraping on https://hackerspaces.org
|
|
41
|
+ c.Visit("http://dict.cn/about")
|
|
42
|
+}
|
|
43
|
+
|
|
44
|
+func newTestCmd() *cobra.Command {
|
|
45
|
+ cmd := &cobra.Command{
|
|
46
|
+ Use: "test",
|
|
47
|
+ Short: "Run the test service",
|
|
48
|
+ Run: func(cmd *cobra.Command, args []string) {
|
|
49
|
+ //fmt.Println("Echo: " + strings.Join(args, " "))
|
|
50
|
+
|
|
51
|
+ test()
|
|
52
|
+ },
|
|
53
|
+ }
|
|
54
|
+ return cmd
|
|
55
|
+}
|
|
56
|
+
|