Text to Speech Speech to Text

job.go 5.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. package job
  2. import (
  3. "fmt"
  4. "git.links123.net/Slate/CorpusAI/service"
  5. "git.links123.net/Slate/CorpusAI/service/store/cache"
  6. "git.links123.net/Slate/CorpusAI/service/store/mysql"
  7. "git.links123.net/Slate/CorpusAI/config"
  8. "github.com/jinzhu/gorm"
  9. _ "github.com/go-sql-driver/mysql"
  10. "github.com/spf13/cobra"
  11. "github.com/gocolly/colly"
  12. "net/url"
  13. "strings"
  14. "html"
  15. )
  16. type TtsRaw struct {
  17. ID int64
  18. Text string //翻译的文本
  19. UniqKey string //唯一键值
  20. Status int64
  21. Remark string
  22. }
  23. func (TtsRaw) TableName() string {
  24. return "lnk_corpus_tts_raw"
  25. }
  26. type langType struct {
  27. languageCode string;
  28. voiceName string;
  29. }
  30. type Phrase struct {
  31. Text string
  32. Paraphrase string
  33. Type int64
  34. Word string
  35. }
  36. func (Phrase) TableName() string {
  37. return "lnk_corpus_phrase_spider"
  38. }
  39. func RunCommand() *cobra.Command {
  40. cmd := &cobra.Command{
  41. Use: "job",
  42. Short: "Run the job service",
  43. Run: func(cmd *cobra.Command, args []string) {
  44. //fmt.Println("Echo: " + strings.Join(args, " "))
  45. minId := args[0]
  46. maxId := args[1]
  47. dbConfig := config.C.DB
  48. settings := dbConfig.User+":"+dbConfig.Password+"@tcp("+dbConfig.Host+")/"+dbConfig.Name+"?charset=utf8&parseTime=True&loc=Local"
  49. var ttsRawList []TtsRaw
  50. db, err := gorm.Open("mysql", settings)
  51. if err != nil {
  52. panic("failed to connect database")
  53. }
  54. db.Where("id <= ? and id >= ?", maxId, minId).Find(&ttsRawList)
  55. for _, ttsRaw := range ttsRawList {
  56. word := ttsRaw.Text
  57. c := colly.NewCollector(
  58. // Visit only domains: hackerspaces.org, wiki.hackerspaces.org
  59. colly.AllowedDomains("dict.cn", "m.dict.cn"),
  60. )
  61. // 例句
  62. c.OnHTML("div[class=\"layout sort\"]", func(e *colly.HTMLElement) {
  63. e.ForEach("ol", func(_ int, eol *colly.HTMLElement) {
  64. eol.ForEach("li", func(_ int, el *colly.HTMLElement) {
  65. //fmt.Println(el.DOM.Html())
  66. liText, _ := el.DOM.Html()
  67. liData := strings.Split(html.UnescapeString(liText), "<br/>")
  68. example := ReplaceTrim(liData[0])
  69. exampleParaphrase := ReplaceTrim(liData[1])
  70. examplePhrase := Phrase{Text:example, Paraphrase:exampleParaphrase, Type: 0, Word:word}
  71. fmt.Println(examplePhrase)
  72. db.Create(&examplePhrase)
  73. })
  74. })
  75. })
  76. // 词汇搭配, 短语
  77. c.OnHTML("div[class=\"layout coll\"]", func(e *colly.HTMLElement) {
  78. e.ForEach("li", func(_ int, el *colly.HTMLElement) {
  79. if el.ChildAttr("a", "href") != "" {
  80. phrase := ReplaceTrim(el.ChildText("a"))
  81. paraphrase := ReplaceTrim(el.Text)
  82. paraphrase = strings.Replace(paraphrase, phrase, "", 1)
  83. newPhrase := Phrase{Text:phrase, Paraphrase:paraphrase, Type: 1, Word:word}
  84. fmt.Println(newPhrase)
  85. db.Create(&newPhrase)
  86. }
  87. })
  88. })
  89. c.OnHTML("div[class=\"layout anno\"]", func(e *colly.HTMLElement) {
  90. e.ForEach("li", func(_ int, el *colly.HTMLElement) {
  91. if el.ChildAttr("a", "href") != "" {
  92. phrase := ReplaceTrim(el.ChildText("a"))
  93. paraphrase := ReplaceTrim(el.Text)
  94. paraphrase = strings.Replace(paraphrase, phrase, "", 1)
  95. newPhrase := Phrase{Text:phrase, Paraphrase:paraphrase, Type: 1, Word:word}
  96. fmt.Println(newPhrase)
  97. db.Create(&newPhrase)
  98. }
  99. })
  100. })
  101. c.OnRequest(func(r *colly.Request) {
  102. fmt.Println("Visiting", r.URL.String())
  103. })
  104. c.Visit("http://dict.cn/"+word)
  105. //if result == true {
  106. ttsRaw.Status = 1
  107. //更新成功
  108. //} else {
  109. // ttsRaw.Status = -1
  110. // //更新错误状态和msg
  111. // ttsRaw.Remark = msg
  112. //}
  113. fmt.Println(ttsRaw)
  114. db.Save(&ttsRaw)
  115. }
  116. db.Close()
  117. },
  118. }
  119. return cmd
  120. }
  121. func SyncTtsOss(text string, speed float64, pitch float64) (bool, string) {
  122. text, _ = url.QueryUnescape(text)
  123. text = strings.Trim(text , "")
  124. if text == "" {
  125. return false, "Error: text null"
  126. }
  127. typeMap := make(map[int]langType)
  128. typeMap[1] = langType{"en-US","en-US-Wavenet-B"}
  129. typeMap[2] = langType{"en-US","en-US-Wavenet-C"}
  130. typeMap[3] = langType{"en-GB","en-GB-Wavenet-B"}
  131. typeMap[4] = langType{"en-GB","en-GB-Wavenet-C"}
  132. for _, lang := range typeMap {
  133. ossObjectKey := service.GetTtsOssKey(text, lang.voiceName, lang.languageCode, speed, pitch)
  134. textKey := cache.GetTextKey(ossObjectKey)
  135. AudioContent, err := service.TextToSpeech(text, lang.voiceName, lang.languageCode, speed, pitch)
  136. if err != nil {
  137. return false, "TextToSpeech Error:" + err.Error()
  138. }
  139. uploadResult, err := service.UploadHkOss(ossObjectKey, AudioContent)
  140. if uploadResult == true {
  141. uploadResult, err = service.UploadOss(ossObjectKey, AudioContent)
  142. if uploadResult == true {
  143. //hk&cn节点oss都同步成功, set db
  144. mysql.CreateCorpusTts(text, textKey, lang.languageCode, lang.voiceName, ossObjectKey, speed, pitch)
  145. }
  146. }
  147. if err != nil {
  148. return false, "UploadHkOss Error" + err.Error()
  149. }
  150. }
  151. return true, ""
  152. }
  153. func ReplaceTrim(str string) string {
  154. str = strings.Replace(str, "\n", "", -1)
  155. str = strings.Replace(str, "\t", "", -1)
  156. return strings.Trim(str, "")
  157. }