package job import ( "crypto/md5" "encoding/json" "fmt" "git.links123.net/Slate/CorpusAI/service" "git.links123.net/Slate/CorpusAI/service/store/cache" "git.links123.net/Slate/CorpusAI/service/store/mysql" "git.links123.net/Slate/CorpusAI/config" "github.com/jinzhu/gorm" _ "github.com/go-sql-driver/mysql" "github.com/spf13/cobra" "io/ioutil" "log" "math/rand" "net/http" "net/url" "strconv" "strings" "time" ) type TtsRaw struct { ID int64 Text string //翻译的文本 UniqKey string //唯一键值 Status int64 Remark string } func (TtsRaw) TableName() string { return "lnk_corpus_tts_raw" } type langType struct { languageCode string; voiceName string; } type Phrase struct { Text string Paraphrase string Type int64 Word string } type Phonetic struct { ID int64 Word string //翻译的文本 UkPhonetic string //唯一键值 Status int64 UsPhonetic string } func (Phrase) TableName() string { return "lnk_corpus_phrase_spider" } func RunCommand() *cobra.Command { cmd := &cobra.Command{ Use: "job", Short: "Run the job service", Run: func(cmd *cobra.Command, args []string) { //fmt.Println("Echo: " + strings.Join(args, " ")) minId := args[0] maxId := args[1] dbConfig := config.C.DB settings := dbConfig.User+":"+dbConfig.Password+"@tcp("+dbConfig.Host+")/"+dbConfig.Name+"?charset=utf8&parseTime=True&loc=Local" var phoneticList []Phonetic db, err := gorm.Open("mysql", settings) if err != nil { panic("failed to connect database") } db.Where("id <= ? and id >= ?", maxId, minId).Find(&phoneticList) for _, ttsRaw := range phoneticList { word := ttsRaw.Word Phonetic := transApi(word, "", "") //if result == true { ttsRaw.Status = 1 ttsRaw.UkPhonetic = Phonetic["uk_phonetic"] ttsRaw.UsPhonetic = Phonetic["us_phonetic"] fmt.Println(ttsRaw) db.Save(&ttsRaw) } db.Close() //return //var ttsRawList []TtsRaw // //db, err := gorm.Open("mysql", settings) //if err != nil { // panic("failed to connect database") //} // //db.Where("id <= ? and id >= ?", maxId, minId).Find(&ttsRawList) // //for _, ttsRaw := range ttsRawList { // // word := ttsRaw.Text // c := colly.NewCollector( // // Visit only domains: hackerspaces.org, wiki.hackerspaces.org // colly.AllowedDomains("dict.cn", "m.dict.cn"), // ) // // // 例句 // c.OnHTML("div[class=\"layout sort\"]", func(e *colly.HTMLElement) { // e.ForEach("ol", func(_ int, eol *colly.HTMLElement) { // eol.ForEach("li", func(_ int, el *colly.HTMLElement) { // //fmt.Println(el.DOM.Html()) // liText, _ := el.DOM.Html() // liData := strings.Split(html.UnescapeString(liText), "
") // example := ReplaceTrim(liData[0]) // exampleParaphrase := ReplaceTrim(liData[1]) // // examplePhrase := Phrase{Text:example, Paraphrase:exampleParaphrase, Type: 0, Word:word} // fmt.Println(examplePhrase) // db.Create(&examplePhrase) // }) // }) // }) // // // 词汇搭配, 短语 // c.OnHTML("div[class=\"layout coll\"]", func(e *colly.HTMLElement) { // // e.ForEach("li", func(_ int, el *colly.HTMLElement) { // if el.ChildAttr("a", "href") != "" { // phrase := ReplaceTrim(el.ChildText("a")) // paraphrase := ReplaceTrim(el.Text) // paraphrase = strings.Replace(paraphrase, phrase, "", 1) // // newPhrase := Phrase{Text:phrase, Paraphrase:paraphrase, Type: 1, Word:word} // fmt.Println(newPhrase) // db.Create(&newPhrase) // } // }) // }) // // c.OnHTML("div[class=\"layout anno\"]", func(e *colly.HTMLElement) { // // e.ForEach("li", func(_ int, el *colly.HTMLElement) { // if el.ChildAttr("a", "href") != "" { // phrase := ReplaceTrim(el.ChildText("a")) // paraphrase := ReplaceTrim(el.Text) // paraphrase = strings.Replace(paraphrase, phrase, "", 1) // // newPhrase := Phrase{Text:phrase, Paraphrase:paraphrase, Type: 1, Word:word} // fmt.Println(newPhrase) // db.Create(&newPhrase) // } // }) // }) // c.OnRequest(func(r *colly.Request) { // fmt.Println("Visiting", r.URL.String()) // }) // // c.Visit("http://dict.cn/"+word) // // //if result == true { // ttsRaw.Status = 1 // //更新成功 // //} else { // // ttsRaw.Status = -1 // // //更新错误状态和msg // // ttsRaw.Remark = msg // //} // fmt.Println(ttsRaw) // db.Save(&ttsRaw) //} db.Close() }, } return cmd } func SyncTtsOss(text string, speed float64, pitch float64) (bool, string) { text, _ = url.QueryUnescape(text) text = strings.Trim(text , "") if text == "" { return false, "Error: text null" } typeMap := make(map[int]langType) typeMap[1] = langType{"en-US","en-US-Wavenet-B"} typeMap[2] = langType{"en-US","en-US-Wavenet-C"} typeMap[3] = langType{"en-GB","en-GB-Wavenet-B"} typeMap[4] = langType{"en-GB","en-GB-Wavenet-C"} for _, lang := range typeMap { ossObjectKey := service.GetTtsOssKey(text, lang.voiceName, lang.languageCode, speed, pitch) textKey := cache.GetTextKey(ossObjectKey) AudioContent, err := service.TextToSpeech(text, lang.voiceName, lang.languageCode, speed, pitch) if err != nil { return false, "TextToSpeech Error:" + err.Error() } uploadResult, err := service.UploadHkOss(ossObjectKey, AudioContent) if uploadResult == true { uploadResult, err = service.UploadOss(ossObjectKey, AudioContent) if uploadResult == true { //hk&cn节点oss都同步成功, set db mysql.CreateCorpusTts(text, textKey, lang.languageCode, lang.voiceName, ossObjectKey, speed, pitch) } } if err != nil { return false, "UploadHkOss Error" + err.Error() } } return true, "" } func ReplaceTrim(str string) string { str = strings.Replace(str, "\n", "", -1) str = strings.Replace(str, "\t", "", -1) return strings.Trim(str, "") } func transApi(text, from, to string) map[string]string { apiUrl := "http://openapi.youdao.com/api" appKey := "629a1435e6d2a894" secKey := "DAgjDJfE0xPdZtMVhl1YUFUIrZc2DVHd" basicMap := make (map[string]string) if from == "" { from = "auto" } if to == "" { to = "auto" } rand.Seed(time.Now().Unix()) salt := strconv.Itoa(rand.Int()) sign := buildSign(appKey, text, salt, secKey) request, err := http.NewRequest("GET", apiUrl, nil) if err != nil { log.Print(err) return basicMap } query := request.URL.Query() query.Add("q", text) query.Add("from", from) query.Add("to", to) query.Add("appKey", appKey) query.Add("salt", salt) query.Add("sign", sign) request.URL.RawQuery = query.Encode() //fmt.Println(request.URL.String()) var resp *http.Response resp, err = http.DefaultClient.Do(request) if err != nil { log.Print(err) return basicMap } defer resp.Body.Close() body, _ := ioutil.ReadAll(resp.Body) result := map[string]interface{}{} json.Unmarshal(body, &result) basic := result["basic"].(map[string]string) //fmt.Println(basic["phonetic"],basic["uk-phonetic"],basic["us-phonetic"]) basicMap["uk-phonetic"] = basic["uk-phonetic"] basicMap["us-phonetic"] = basic["us-phonetic"] return basicMap } func buildSign(appKey, text, salt, secKey string) string { data := []byte(appKey + text + salt + secKey) hash := md5.Sum(data) return fmt.Sprintf("%x", hash) }