1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- package common
- import (
- "encoding/json"
- "github.com/go-rod/rod/lib/utils"
- "os"
- "strings"
- )
- // GetCollectPDFPath 生成一个采集用的pdf路径
- func GetCollectPDFPath(district, taxNo, taxTape string) (string, error) {
- var err error
- var path string
- path = "./data/collection/" + district + "/"
- if !PathExists(path) {
- err = os.MkdirAll(path, os.ModePerm)
- }
- path = path + taxNo + "_" + taxTape + ".pdf"
- return path, err
- }
- // PDFtoStrArrayPython pdf转二维数组 python服务
- func PDFtoStrArrayPython(path string) ([][]string, error) {
- var code [][]string
- files := map[string]string{
- "file": path,
- }
- var bys []byte
- var err error
- pdfServers := []string{
- `http://47.104.75.113:6000/pdfToText`,
- `http://47.104.75.113:6000/pdfToText`,
- }
- for _, uri := range pdfServers {
- bys, err = PostFile(uri, files, map[string]string{})
- if err != nil {
- utils.Sleep(10)
- continue
- }
- break
- }
- if err != nil {
- return code, err
- }
- json.Unmarshal(bys, &code)
- return code, nil
- }
- // UploadPdfToStrArray java PDF转数组
- func UploadPdfToStrArray(path string) ([]string, error) {
- f := 0
- begin:
- url := "http://47.104.75.113:8080/api/uploadPdfToText" //灵信服务器
- files := map[string]string{
- "file": path,
- }
- resp, err := PostFile(url, files, map[string]string{})
- arr := []string{}
- if err != nil {
- if f < 3 {
- f++
- utils.Sleep(10)
- goto begin
- }
- return arr, err
- }
- type resut struct {
- Text string `json:"text"`
- }
- var Result resut
- if err := json.Unmarshal(resp, &Result); err != nil {
- return arr, err
- }
- arr = strings.Split(Result.Text, "\n")
- return arr, nil
- }
|