123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316 |
- package job
-
- import (
- "crypto/md5"
- "encoding/json"
- "fmt"
- "git.links123.net/Slate/CorpusAI/service"
- "git.links123.net/Slate/CorpusAI/service/store/cache"
- "git.links123.net/Slate/CorpusAI/service/store/mysql"
- "git.links123.net/Slate/CorpusAI/config"
- "github.com/jinzhu/gorm"
- _ "github.com/go-sql-driver/mysql"
- "github.com/spf13/cobra"
- "io/ioutil"
- "log"
- "math/rand"
- "net/http"
- "net/url"
- "strconv"
- "strings"
- "time"
- )
-
- type TtsRaw struct {
- ID int64
- Text string //翻译的文本
- UniqKey string //唯一键值
- Status int64
- Remark string
- }
-
- func (TtsRaw) TableName() string {
- return "lnk_corpus_tts_raw"
- }
-
- type langType struct {
- languageCode string;
- voiceName string;
- }
-
-
- type Phrase struct {
- Text string
- Paraphrase string
- Type int64
- Word string
- }
-
- type Phonetic struct {
- ID int64
- Word string //翻译的文本
- UkPhonetic string //唯一键值
- Status int64
- UsPhonetic string
- }
-
- func (Phrase) TableName() string {
- return "lnk_corpus_phrase_spider"
- }
-
- func RunCommand() *cobra.Command {
- cmd := &cobra.Command{
- Use: "job",
- Short: "Run the job service",
- Run: func(cmd *cobra.Command, args []string) {
- //fmt.Println("Echo: " + strings.Join(args, " "))
-
- minId := args[0]
- maxId := args[1]
-
- dbConfig := config.C.DB
- settings := dbConfig.User+":"+dbConfig.Password+"@tcp("+dbConfig.Host+")/"+dbConfig.Name+"?charset=utf8&parseTime=True&loc=Local"
-
- var phoneticList []Phonetic
-
- db, err := gorm.Open("mysql", settings)
- if err != nil {
- panic("failed to connect database")
- }
-
- db.Where("id <= ? and id >= ?", maxId, minId).Find(&phoneticList)
-
- for _, ttsRaw := range phoneticList {
-
- word := ttsRaw.Word
-
- Phonetic := transApi(word, "", "")
- //if result == true {
- ttsRaw.Status = 1
- ttsRaw.UkPhonetic = Phonetic["uk_phonetic"]
- ttsRaw.UsPhonetic = Phonetic["us_phonetic"]
-
- fmt.Println(ttsRaw)
- db.Save(&ttsRaw)
- }
-
- db.Close()
-
- //return
- //var ttsRawList []TtsRaw
- //
- //db, err := gorm.Open("mysql", settings)
- //if err != nil {
- // panic("failed to connect database")
- //}
- //
- //db.Where("id <= ? and id >= ?", maxId, minId).Find(&ttsRawList)
- //
- //for _, ttsRaw := range ttsRawList {
- //
- // word := ttsRaw.Text
- // c := colly.NewCollector(
- // // Visit only domains: hackerspaces.org, wiki.hackerspaces.org
- // colly.AllowedDomains("dict.cn", "m.dict.cn"),
- // )
- //
- // // 例句
- // c.OnHTML("div[class=\"layout sort\"]", func(e *colly.HTMLElement) {
- // e.ForEach("ol", func(_ int, eol *colly.HTMLElement) {
- // eol.ForEach("li", func(_ int, el *colly.HTMLElement) {
- // //fmt.Println(el.DOM.Html())
- // liText, _ := el.DOM.Html()
- // liData := strings.Split(html.UnescapeString(liText), "<br/>")
- // example := ReplaceTrim(liData[0])
- // exampleParaphrase := ReplaceTrim(liData[1])
- //
- // examplePhrase := Phrase{Text:example, Paraphrase:exampleParaphrase, Type: 0, Word:word}
- // fmt.Println(examplePhrase)
- // db.Create(&examplePhrase)
- // })
- // })
- // })
- //
- // // 词汇搭配, 短语
- // c.OnHTML("div[class=\"layout coll\"]", func(e *colly.HTMLElement) {
- //
- // e.ForEach("li", func(_ int, el *colly.HTMLElement) {
- // if el.ChildAttr("a", "href") != "" {
- // phrase := ReplaceTrim(el.ChildText("a"))
- // paraphrase := ReplaceTrim(el.Text)
- // paraphrase = strings.Replace(paraphrase, phrase, "", 1)
- //
- // newPhrase := Phrase{Text:phrase, Paraphrase:paraphrase, Type: 1, Word:word}
- // fmt.Println(newPhrase)
- // db.Create(&newPhrase)
- // }
- // })
- // })
- //
- // c.OnHTML("div[class=\"layout anno\"]", func(e *colly.HTMLElement) {
- //
- // e.ForEach("li", func(_ int, el *colly.HTMLElement) {
- // if el.ChildAttr("a", "href") != "" {
- // phrase := ReplaceTrim(el.ChildText("a"))
- // paraphrase := ReplaceTrim(el.Text)
- // paraphrase = strings.Replace(paraphrase, phrase, "", 1)
- //
- // newPhrase := Phrase{Text:phrase, Paraphrase:paraphrase, Type: 1, Word:word}
- // fmt.Println(newPhrase)
- // db.Create(&newPhrase)
- // }
- // })
- // })
- // c.OnRequest(func(r *colly.Request) {
- // fmt.Println("Visiting", r.URL.String())
- // })
- //
- // c.Visit("http://dict.cn/"+word)
- //
- // //if result == true {
- // ttsRaw.Status = 1
- // //更新成功
- // //} else {
- // // ttsRaw.Status = -1
- // // //更新错误状态和msg
- // // ttsRaw.Remark = msg
- // //}
- // fmt.Println(ttsRaw)
- // db.Save(&ttsRaw)
- //}
-
- db.Close()
- },
- }
- return cmd
- }
-
- func SyncTtsOss(text string, speed float64, pitch float64) (bool, string) {
-
- text, _ = url.QueryUnescape(text)
-
- text = strings.Trim(text , "")
-
- if text == "" {
- return false, "Error: text null"
- }
-
- typeMap := make(map[int]langType)
- typeMap[1] = langType{"en-US","en-US-Wavenet-B"}
- typeMap[2] = langType{"en-US","en-US-Wavenet-C"}
- typeMap[3] = langType{"en-GB","en-GB-Wavenet-B"}
- typeMap[4] = langType{"en-GB","en-GB-Wavenet-C"}
-
- for _, lang := range typeMap {
-
- ossObjectKey := service.GetTtsOssKey(text, lang.voiceName, lang.languageCode, speed, pitch)
-
- textKey := cache.GetTextKey(ossObjectKey)
-
- AudioContent, err := service.TextToSpeech(text, lang.voiceName, lang.languageCode, speed, pitch)
- if err != nil {
-
- return false, "TextToSpeech Error:" + err.Error()
- }
-
- uploadResult, err := service.UploadHkOss(ossObjectKey, AudioContent)
-
- if uploadResult == true {
- uploadResult, err = service.UploadOss(ossObjectKey, AudioContent)
-
- if uploadResult == true {
-
- //hk&cn节点oss都同步成功, set db
- mysql.CreateCorpusTts(text, textKey, lang.languageCode, lang.voiceName, ossObjectKey, speed, pitch)
- }
- }
-
- if err != nil {
-
- return false, "UploadHkOss Error" + err.Error()
- }
- }
-
- return true, ""
- }
-
- func ReplaceTrim(str string) string {
-
- str = strings.Replace(str, "\n", "", -1)
- str = strings.Replace(str, "\t", "", -1)
-
- return strings.Trim(str, "")
- }
-
-
- func transApi(text, from, to string) map[string]string {
-
- apiUrl := "http://openapi.youdao.com/api"
- appKey := "629a1435e6d2a894"
- secKey := "DAgjDJfE0xPdZtMVhl1YUFUIrZc2DVHd"
-
- basicMap := make (map[string]string)
-
- if from == "" {
- from = "auto"
- }
-
- if to == "" {
- to = "auto"
- }
-
- rand.Seed(time.Now().Unix())
- salt := strconv.Itoa(rand.Int())
-
- sign := buildSign(appKey, text, salt, secKey)
-
- request, err := http.NewRequest("GET", apiUrl, nil)
- if err != nil {
- log.Print(err)
-
- return basicMap
- }
-
- query := request.URL.Query()
- query.Add("q", text)
- query.Add("from", from)
- query.Add("to", to)
- query.Add("appKey", appKey)
- query.Add("salt", salt)
- query.Add("sign", sign)
-
- request.URL.RawQuery = query.Encode()
-
- //fmt.Println(request.URL.String())
-
- var resp *http.Response
- resp, err = http.DefaultClient.Do(request)
- if err != nil {
- log.Print(err)
-
- return basicMap
- }
-
- defer resp.Body.Close()
-
- body, _ := ioutil.ReadAll(resp.Body)
-
- result := map[string]interface{}{}
- json.Unmarshal(body, &result)
-
- basic := result["basic"].(map[string]string)
- //fmt.Println(basic["phonetic"],basic["uk-phonetic"],basic["us-phonetic"])
-
- basicMap["uk-phonetic"] = basic["uk-phonetic"]
- basicMap["us-phonetic"] = basic["us-phonetic"]
-
- return basicMap
- }
-
- func buildSign(appKey, text, salt, secKey string) string {
-
- data := []byte(appKey + text + salt + secKey)
- hash := md5.Sum(data)
-
- return fmt.Sprintf("%x", hash)
- }
|