htmlbasic.go 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. /*
  2. * Copyright (c) 2014 Kurt Jung (Gmail: kurt.w.jung)
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. package gofpdf
  17. import (
  18. "regexp"
  19. "strings"
  20. )
  21. // HTMLBasicSegmentType defines a segment of literal text in which the current
  22. // attributes do not vary, or an open tag or a close tag.
  23. type HTMLBasicSegmentType struct {
  24. Cat byte // 'O' open tag, 'C' close tag, 'T' text
  25. Str string // Literal text unchanged, tags are lower case
  26. Attr map[string]string // Attribute keys are lower case
  27. }
  28. // HTMLBasicTokenize returns a list of HTML tags and literal elements. This is
  29. // done with regular expressions, so the result is only marginally better than
  30. // useless.
  31. func HTMLBasicTokenize(htmlStr string) (list []HTMLBasicSegmentType) {
  32. // This routine is adapted from http://www.fpdf.org/
  33. list = make([]HTMLBasicSegmentType, 0, 16)
  34. htmlStr = strings.Replace(htmlStr, "\n", " ", -1)
  35. htmlStr = strings.Replace(htmlStr, "\r", "", -1)
  36. tagRe, _ := regexp.Compile(`(?U)<.*>`)
  37. attrRe, _ := regexp.Compile(`([^=]+)=["']?([^"']+)`)
  38. capList := tagRe.FindAllStringIndex(htmlStr, -1)
  39. if capList != nil {
  40. var seg HTMLBasicSegmentType
  41. var parts []string
  42. pos := 0
  43. for _, cap := range capList {
  44. if pos < cap[0] {
  45. seg.Cat = 'T'
  46. seg.Str = htmlStr[pos:cap[0]]
  47. seg.Attr = nil
  48. list = append(list, seg)
  49. }
  50. if htmlStr[cap[0]+1] == '/' {
  51. seg.Cat = 'C'
  52. seg.Str = strings.ToLower(htmlStr[cap[0]+2 : cap[1]-1])
  53. seg.Attr = nil
  54. list = append(list, seg)
  55. } else {
  56. // Extract attributes
  57. parts = strings.Split(htmlStr[cap[0]+1:cap[1]-1], " ")
  58. if len(parts) > 0 {
  59. for j, part := range parts {
  60. if j == 0 {
  61. seg.Cat = 'O'
  62. seg.Str = strings.ToLower(parts[0])
  63. seg.Attr = make(map[string]string)
  64. } else {
  65. attrList := attrRe.FindAllStringSubmatch(part, -1)
  66. if attrList != nil {
  67. for _, attr := range attrList {
  68. seg.Attr[strings.ToLower(attr[1])] = attr[2]
  69. }
  70. }
  71. }
  72. }
  73. list = append(list, seg)
  74. }
  75. }
  76. pos = cap[1]
  77. }
  78. if len(htmlStr) > pos {
  79. seg.Cat = 'T'
  80. seg.Str = htmlStr[pos:]
  81. seg.Attr = nil
  82. list = append(list, seg)
  83. }
  84. } else {
  85. list = append(list, HTMLBasicSegmentType{Cat: 'T', Str: htmlStr, Attr: nil})
  86. }
  87. return
  88. }
  89. // HTMLBasicType is used for rendering a very basic subset of HTML. It supports
  90. // only hyperlinks and bold, italic and underscore attributes. In the Link
  91. // structure, the ClrR, ClrG and ClrB fields (0 through 255) define the color
  92. // of hyperlinks. The Bold, Italic and Underscore values define the hyperlink
  93. // style.
  94. type HTMLBasicType struct {
  95. pdf *Fpdf
  96. Link struct {
  97. ClrR, ClrG, ClrB int
  98. Bold, Italic, Underscore bool
  99. }
  100. }
  101. // HTMLBasicNew returns an instance that facilitates writing basic HTML in the
  102. // specified PDF file.
  103. func (f *Fpdf) HTMLBasicNew() (html HTMLBasicType) {
  104. html.pdf = f
  105. html.Link.ClrR, html.Link.ClrG, html.Link.ClrB = 0, 0, 128
  106. html.Link.Bold, html.Link.Italic, html.Link.Underscore = false, false, true
  107. return
  108. }
  109. // Write prints text from the current position using the currently selected
  110. // font. See HTMLBasicNew() to create a receiver that is associated with the
  111. // PDF document instance. The text can be encoded with a basic subset of HTML
  112. // that includes hyperlinks and tags for italic (I), bold (B), underscore
  113. // (U) and center (CENTER) attributes. When the right margin is reached a line
  114. // break occurs and text continues from the left margin. Upon method exit, the
  115. // current position is left at the end of the text.
  116. //
  117. // lineHt indicates the line height in the unit of measure specified in New().
  118. func (html *HTMLBasicType) Write(lineHt float64, htmlStr string) {
  119. var boldLvl, italicLvl, underscoreLvl, linkBold, linkItalic, linkUnderscore int
  120. var textR, textG, textB = html.pdf.GetTextColor()
  121. var hrefStr string
  122. if html.Link.Bold {
  123. linkBold = 1
  124. }
  125. if html.Link.Italic {
  126. linkItalic = 1
  127. }
  128. if html.Link.Underscore {
  129. linkUnderscore = 1
  130. }
  131. setStyle := func(boldAdj, italicAdj, underscoreAdj int) {
  132. styleStr := ""
  133. boldLvl += boldAdj
  134. if boldLvl > 0 {
  135. styleStr += "B"
  136. }
  137. italicLvl += italicAdj
  138. if italicLvl > 0 {
  139. styleStr += "I"
  140. }
  141. underscoreLvl += underscoreAdj
  142. if underscoreLvl > 0 {
  143. styleStr += "U"
  144. }
  145. html.pdf.SetFont("", styleStr, 0)
  146. }
  147. putLink := func(urlStr, txtStr string) {
  148. // Put a hyperlink
  149. html.pdf.SetTextColor(html.Link.ClrR, html.Link.ClrG, html.Link.ClrB)
  150. setStyle(linkBold, linkItalic, linkUnderscore)
  151. html.pdf.WriteLinkString(lineHt, txtStr, urlStr)
  152. setStyle(-linkBold, -linkItalic, -linkUnderscore)
  153. html.pdf.SetTextColor(textR, textG, textB)
  154. }
  155. list := HTMLBasicTokenize(htmlStr)
  156. var ok bool
  157. alignStr := "L"
  158. for _, el := range list {
  159. switch el.Cat {
  160. case 'T':
  161. if len(hrefStr) > 0 {
  162. putLink(hrefStr, el.Str)
  163. hrefStr = ""
  164. } else {
  165. if alignStr == "C" || alignStr == "R" {
  166. html.pdf.WriteAligned(0, lineHt, el.Str, alignStr)
  167. } else {
  168. html.pdf.Write(lineHt, el.Str)
  169. }
  170. }
  171. case 'O':
  172. switch el.Str {
  173. case "b":
  174. setStyle(1, 0, 0)
  175. case "i":
  176. setStyle(0, 1, 0)
  177. case "u":
  178. setStyle(0, 0, 1)
  179. case "br":
  180. html.pdf.Ln(lineHt)
  181. case "center":
  182. html.pdf.Ln(lineHt)
  183. alignStr = "C"
  184. case "right":
  185. html.pdf.Ln(lineHt)
  186. alignStr = "R"
  187. case "left":
  188. html.pdf.Ln(lineHt)
  189. alignStr = "L"
  190. case "a":
  191. hrefStr, ok = el.Attr["href"]
  192. if !ok {
  193. hrefStr = ""
  194. }
  195. }
  196. case 'C':
  197. switch el.Str {
  198. case "b":
  199. setStyle(-1, 0, 0)
  200. case "i":
  201. setStyle(0, -1, 0)
  202. case "u":
  203. setStyle(0, 0, -1)
  204. case "center":
  205. html.pdf.Ln(lineHt)
  206. alignStr = "L"
  207. case "right":
  208. html.pdf.Ln(lineHt)
  209. alignStr = "L"
  210. }
  211. }
  212. }
  213. }