|
@@ -9,6 +9,7 @@ import (
|
9
|
9
|
"github.com/jinzhu/gorm"
|
10
|
10
|
_ "github.com/go-sql-driver/mysql"
|
11
|
11
|
"github.com/spf13/cobra"
|
|
12
|
+ "github.com/gocolly/colly"
|
12
|
13
|
"net/url"
|
13
|
14
|
"strconv"
|
14
|
15
|
"strings"
|
|
@@ -126,3 +127,50 @@ func SyncTtsOss(text string, speed float64, pitch float64) (bool, string) {
|
126
|
127
|
|
127
|
128
|
return true, ""
|
128
|
129
|
}
|
|
130
|
+
|
|
131
|
+func test() {
|
|
132
|
+
|
|
133
|
+ // Instantiate default collector
|
|
134
|
+ c := colly.NewCollector(
|
|
135
|
+ // Visit only domains: hackerspaces.org, wiki.hackerspaces.org
|
|
136
|
+ colly.AllowedDomains("dict.cn", "m.dict.cn"),
|
|
137
|
+ )
|
|
138
|
+
|
|
139
|
+ // On every a element which has href attribute call callback
|
|
140
|
+ c.OnHTML("div[class=sent]", func(e *colly.HTMLElement) {
|
|
141
|
+ //link := e.Attr("href")
|
|
142
|
+ // Print link
|
|
143
|
+ fmt.Printf("Link found: %q -> %s\n", e.Text)
|
|
144
|
+ // Visit link found on page
|
|
145
|
+ // Only those links are visited which are in AllowedDomains
|
|
146
|
+ //c.Visit(e.Request.AbsoluteURL(link))
|
|
147
|
+ })
|
|
148
|
+
|
|
149
|
+ /**
|
|
150
|
+ 1. div[class=sent]
|
|
151
|
+ 2. 例句 div[class=layout sort]
|
|
152
|
+ 3. 词法用法 div[class=section learn]
|
|
153
|
+ 4.
|
|
154
|
+ */
|
|
155
|
+
|
|
156
|
+ // Before making a request print "Visiting ..."
|
|
157
|
+ c.OnRequest(func(r *colly.Request) {
|
|
158
|
+ fmt.Println("Visiting", r.URL.String())
|
|
159
|
+ })
|
|
160
|
+
|
|
161
|
+ // Start scraping on https://hackerspaces.org
|
|
162
|
+ c.Visit("http://dict.cn/about")
|
|
163
|
+}
|
|
164
|
+
|
|
165
|
+func newTestCmd() *cobra.Command {
|
|
166
|
+ cmd := &cobra.Command{
|
|
167
|
+ Use: "test",
|
|
168
|
+ Short: "Run the test service",
|
|
169
|
+ Run: func(cmd *cobra.Command, args []string) {
|
|
170
|
+ //fmt.Println("Echo: " + strings.Join(args, " "))
|
|
171
|
+
|
|
172
|
+ test()
|
|
173
|
+ },
|
|
174
|
+ }
|
|
175
|
+ return cmd
|
|
176
|
+}
|