| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176 | 
							- package job
 - 
 - import (
 - 	"fmt"
 - 	"git.links123.net/Slate/CorpusAI/service"
 - 	"git.links123.net/Slate/CorpusAI/service/store/cache"
 - 	"git.links123.net/Slate/CorpusAI/service/store/mysql"
 - 	"git.links123.net/Slate/CorpusAI/config"
 - 	"github.com/jinzhu/gorm"
 - 	_ "github.com/go-sql-driver/mysql"
 - 	"github.com/spf13/cobra"
 - 	"github.com/gocolly/colly"
 - 	"net/url"
 - 	"strconv"
 - 	"strings"
 - )
 - 
 - type TtsRaw struct {
 - 	ID                     int64
 - 	Text                   string		//翻译的文本
 - 	UniqKey                string 	//唯一键值
 - 	Status				   int64
 - 	Remark                 string
 - }
 - 
 - func (TtsRaw) TableName() string {
 - 	return "lnk_corpus_tts_raw"
 - }
 - 
 - type langType struct {
 - 	languageCode string;
 - 	voiceName string;
 - }
 - 
 - func RunCommand() *cobra.Command {
 - 	cmd := &cobra.Command{
 - 		Use:   "job",
 - 		Short: "Run the job service",
 - 		Run: func(cmd *cobra.Command, args []string) {
 - 			//fmt.Println("Echo: " + strings.Join(args, " "))
 - 
 - 			minId := args[0]
 - 			maxId := args[1]
 - 
 - 			dbConfig := config.C.DB
 - 			settings := dbConfig.User+":"+dbConfig.Password+"@tcp("+dbConfig.Host+")/"+dbConfig.Name+"?charset=utf8&parseTime=True&loc=Local"
 - 
 - 			var ttsRawList []TtsRaw
 - 
 - 			db, err := gorm.Open("mysql", settings)
 - 			if err != nil {
 - 				panic("failed to connect database")
 - 			}
 - 
 - 			speed, _ := strconv.ParseFloat( "1.00", 64)
 - 			pitch, _ := strconv.ParseFloat( "0.00", 64)
 - 
 - 			db.Where("id <= ? and id >= ?", maxId, minId).Find(&ttsRawList)
 - 
 - 			for _, ttsRaw := range ttsRawList {
 - 
 - 				result, msg := SyncTtsOss(ttsRaw.Text, speed, pitch)
 - 
 - 				if result == true {
 - 					ttsRaw.Status = 1
 - 					//更新成功
 - 				} else {
 - 					ttsRaw.Status = -1
 - 					//更新错误状态和msg
 - 					ttsRaw.Remark = msg
 - 				}
 - 				fmt.Println(ttsRaw)
 - 				db.Save(&ttsRaw)
 - 			}
 - 
 - 			db.Close()
 - 		},
 - 	}
 - 	return cmd
 - }
 - 
 - func SyncTtsOss(text string, speed float64, pitch float64) (bool, string) {
 - 
 - 	text, _ = url.QueryUnescape(text)
 - 
 - 	text = strings.Trim(text , "")
 - 
 - 	if text == "" {
 - 		return false, "Error: text null"
 - 	}
 - 
 - 	typeMap := make(map[int]langType)
 - 	typeMap[1] = langType{"en-US","en-US-Wavenet-B"}
 - 	typeMap[2] = langType{"en-US","en-US-Wavenet-C"}
 - 	typeMap[3] = langType{"en-GB","en-GB-Wavenet-B"}
 - 	typeMap[4] = langType{"en-GB","en-GB-Wavenet-C"}
 - 
 - 	for _, lang := range typeMap {
 - 
 - 		ossObjectKey := service.GetTtsOssKey(text, lang.voiceName, lang.languageCode, speed, pitch)
 - 
 - 		textKey := cache.GetTextKey(ossObjectKey)
 - 
 - 		AudioContent, err := service.TextToSpeech(text, lang.voiceName, lang.languageCode, speed, pitch)
 - 		if err != nil {
 - 
 - 			return false, "TextToSpeech Error:" + err.Error()
 - 		}
 - 
 - 		uploadResult, err := service.UploadHkOss(ossObjectKey, AudioContent)
 - 
 - 		if uploadResult == true {
 - 			uploadResult, err = service.UploadOss(ossObjectKey, AudioContent)
 - 
 - 			if uploadResult == true {
 - 
 - 				//hk&cn节点oss都同步成功, set db
 - 				mysql.CreateCorpusTts(text, textKey, lang.languageCode, lang.voiceName, ossObjectKey, speed, pitch)
 - 			}
 - 		}
 - 
 - 		if err != nil {
 - 
 - 			return false, "UploadHkOss Error" + err.Error()
 - 		}
 - 	}
 - 
 - 	return true, ""
 - }
 - 
 - func test() {
 - 
 - 	// Instantiate default collector
 - 	c := colly.NewCollector(
 - 		// Visit only domains: hackerspaces.org, wiki.hackerspaces.org
 - 		colly.AllowedDomains("dict.cn", "m.dict.cn"),
 - 	)
 - 
 - 	// On every a element which has href attribute call callback
 - 	c.OnHTML("div[class=sent]", func(e *colly.HTMLElement) {
 - 		//link := e.Attr("href")
 - 		// Print link
 - 		fmt.Printf("Link found: %q -> %s\n", e.Text)
 - 		// Visit link found on page
 - 		// Only those links are visited which are in AllowedDomains
 - 		//c.Visit(e.Request.AbsoluteURL(link))
 - 	})
 - 
 - 	/**
 - 	1. div[class=sent]
 - 	2. 例句 div[class=layout sort]
 - 	3. 词法用法 div[class=section learn]
 - 	4.
 - 	 */
 - 
 - 	// Before making a request print "Visiting ..."
 - 	c.OnRequest(func(r *colly.Request) {
 - 		fmt.Println("Visiting", r.URL.String())
 - 	})
 - 
 - 	// Start scraping on https://hackerspaces.org
 - 	c.Visit("http://dict.cn/about")
 - }
 - 
 - func newTestCmd() *cobra.Command {
 - 	cmd := &cobra.Command{
 - 		Use:   "test",
 - 		Short: "Run the test service",
 - 		Run: func(cmd *cobra.Command, args []string) {
 - 			//fmt.Println("Echo: " + strings.Join(args, " "))
 - 
 - 			test()
 - 		},
 - 	}
 - 	return cmd
 - }
 
 
  |