mirror of
https://github.com/axllent/mailpit.git
synced 2025-01-10 00:43:53 +02:00
Merge branch 'feature/html2text' into develop
This commit is contained in:
commit
eeac32d09b
2
.github/workflows/tests.yml
vendored
2
.github/workflows/tests.yml
vendored
@ -24,7 +24,7 @@ jobs:
|
||||
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-go-
|
||||
- run: go test ./internal/storage ./server ./internal/tools -v
|
||||
- run: go test ./internal/storage ./server ./internal/tools ./internal/tools/html2text -v
|
||||
- run: go test ./internal/storage -bench=.
|
||||
|
||||
# build the assets
|
||||
|
3
go.mod
3
go.mod
@ -11,11 +11,9 @@ require (
|
||||
github.com/gorilla/mux v1.8.0
|
||||
github.com/gorilla/websocket v1.5.0
|
||||
github.com/jhillyerd/enmime v1.0.1
|
||||
github.com/k3a/html2text v1.2.1
|
||||
github.com/klauspost/compress v1.17.0
|
||||
github.com/leporo/sqlf v1.4.0
|
||||
github.com/mhale/smtpd v0.8.0
|
||||
github.com/microcosm-cc/bluemonday v1.0.25
|
||||
github.com/reiver/go-telnet v0.0.0-20180421082511-9ff0b2ab096e
|
||||
github.com/satori/go.uuid v1.2.0
|
||||
github.com/sirupsen/logrus v1.9.3
|
||||
@ -33,7 +31,6 @@ require (
|
||||
github.com/DATA-DOG/go-sqlmock v1.5.0 // indirect
|
||||
github.com/GehirnInc/crypt v0.0.0-20230320061759-8cc1b52080c5 // indirect
|
||||
github.com/andybalholm/cascadia v1.3.2 // indirect
|
||||
github.com/aymerick/douceur v0.2.0 // indirect
|
||||
github.com/cention-sany/utf7 v0.0.0-20170124080048-26cad61bd60a // indirect
|
||||
github.com/cznic/ql v1.2.0 // indirect
|
||||
github.com/dustin/go-humanize v1.0.1 // indirect
|
||||
|
16
go.sum
16
go.sum
@ -13,8 +13,6 @@ github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsVi
|
||||
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
|
||||
github.com/axllent/semver v0.0.1 h1:QqF+KSGxgj8QZzSXAvKFqjGWE5792ksOnQhludToK8E=
|
||||
github.com/axllent/semver v0.0.1/go.mod h1:2xSPzvG8n9mRfdtxSvWvfTfQGWfHsMsHO1iZnKATMSc=
|
||||
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
|
||||
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
|
||||
github.com/cention-sany/utf7 v0.0.0-20170124080048-26cad61bd60a h1:MISbI8sU/PSK/ztvmWKFcI7UGb5/HQT7B+i3a2myKgI=
|
||||
github.com/cention-sany/utf7 v0.0.0-20170124080048-26cad61bd60a/go.mod h1:2GxOXOlEPAMFPfp014mK1SWq8G8BN8o7/dfYqJrVGn8=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
@ -59,8 +57,6 @@ github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbuBVKCudVG457BR2GZFIz3uw3hQ=
|
||||
github.com/google/uuid v1.3.1 h1:KjJaJ9iWZ3jOFZIf1Lqf4laDRCasjl0BCmnEGxkdLb4=
|
||||
github.com/google/uuid v1.3.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1 h1:EGx4pi6eqNxGaHF6qqu48+N2wcFQ5qg5FXgOdqsJ5d8=
|
||||
github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY=
|
||||
github.com/gorilla/css v1.0.0 h1:BQqNyPTi50JCFMTw/b67hByjMVXZRwGha6wxVGkeihY=
|
||||
github.com/gorilla/css v1.0.0/go.mod h1:Dn721qIggHpt4+EFCcTLTU/vk5ySda2ReITrtgBl60c=
|
||||
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
|
||||
@ -73,10 +69,6 @@ github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 h1:iCHtR9CQykt
|
||||
github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
|
||||
github.com/jhillyerd/enmime v1.0.1 h1:y6RyqIgBOI2hIinOXIzmeB+ITRVls0zTJIm5GwgXnjE=
|
||||
github.com/jhillyerd/enmime v1.0.1/go.mod h1:LMMbm6oTlzWHghPavqHtOrP/NosVv3l42CUrZjn03/Q=
|
||||
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
|
||||
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
|
||||
github.com/k3a/html2text v1.2.1 h1:nvnKgBvBR/myqrwfLuiqecUtaK1lB9hGziIJKatNFVY=
|
||||
github.com/k3a/html2text v1.2.1/go.mod h1:ieEXykM67iT8lTvEWBh6fhpH4B23kB9OMKPdIBmgUqA=
|
||||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs=
|
||||
github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8=
|
||||
github.com/klauspost/compress v1.17.0 h1:Rnbp4K9EjcDuVuHtd0dgA4qNuv9yKDYKK1ulpJwgrqM=
|
||||
@ -99,8 +91,6 @@ github.com/mattn/go-sqlite3 v1.14.16 h1:yOQRA0RpS5PFz/oikGwBEqvAWhWg5ufRz4ETLjwp
|
||||
github.com/mattn/go-sqlite3 v1.14.16/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg=
|
||||
github.com/mhale/smtpd v0.8.0 h1:5JvdsehCg33PQrZBvFyDMMUDQmvbzVpZgKob7eYBJc0=
|
||||
github.com/mhale/smtpd v0.8.0/go.mod h1:MQl+y2hwIEQCXtNhe5+55n0GZOjSmeqORDIXbqUL3x4=
|
||||
github.com/microcosm-cc/bluemonday v1.0.25 h1:4NEwSfiJ+Wva0VxN5B8OwMicaJvD8r9tlJWm9rtloEg=
|
||||
github.com/microcosm-cc/bluemonday v1.0.25/go.mod h1:ZIOjCQp1OrzBBPIJmfX4qDYFuhU02nx4bn030ixfHLE=
|
||||
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
|
||||
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
@ -123,10 +113,6 @@ github.com/satori/go.uuid v1.2.0 h1:0uYX9dsZ2yD7q2RtLRtPSdGDWzjeM3TbMJP9utgA0ww=
|
||||
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d h1:zE9ykElWQ6/NYmHa3jpm/yHnI4xSofP+UP6SpjHcSeM=
|
||||
github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
|
||||
github.com/smartystreets/goconvey v1.6.4 h1:fv0U8FUIMPNf1L9lnHLvLhgicrIVChEkdzIKYqbNC9s=
|
||||
github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA=
|
||||
github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
|
||||
github.com/spf13/cobra v1.7.0/go.mod h1:uLxZILRyS/50WlhOIKD7W6V5bgeIt+4sICxh6uRMrb0=
|
||||
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
||||
@ -166,7 +152,6 @@ golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
|
||||
golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY=
|
||||
golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
|
||||
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
@ -210,7 +195,6 @@ golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
|
||||
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
|
||||
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
|
||||
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
|
||||
|
@ -11,9 +11,9 @@ import (
|
||||
|
||||
"github.com/axllent/mailpit/config"
|
||||
"github.com/axllent/mailpit/internal/logger"
|
||||
"github.com/axllent/mailpit/internal/tools/html2text"
|
||||
"github.com/axllent/mailpit/server/websockets"
|
||||
"github.com/jhillyerd/enmime"
|
||||
"github.com/k3a/html2text"
|
||||
"github.com/leporo/sqlf"
|
||||
)
|
||||
|
||||
@ -39,12 +39,8 @@ func createSearchText(env *enmime.Envelope) string {
|
||||
b.WriteString(env.GetHeader("Bcc") + " ")
|
||||
b.WriteString(env.GetHeader("Reply-To") + " ")
|
||||
b.WriteString(env.GetHeader("Return-Path") + " ")
|
||||
h := strings.TrimSpace(
|
||||
html2text.HTML2TextWithOptions(
|
||||
env.HTML,
|
||||
html2text.WithLinksInnerText(),
|
||||
),
|
||||
)
|
||||
|
||||
h := html2text.Strip(env.HTML, true)
|
||||
if h != "" {
|
||||
b.WriteString(h + " ")
|
||||
} else {
|
||||
|
@ -2,9 +2,7 @@ package tools
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/microcosm-cc/bluemonday"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
@ -19,12 +17,3 @@ func GetHTMLAttributeVal(e *html.Node, key string) (string, error) {
|
||||
|
||||
return "", fmt.Errorf("%s not found", key)
|
||||
}
|
||||
|
||||
// StripHTML returns text from an HTML string
|
||||
func stripHTML(h string) string {
|
||||
p := bluemonday.StrictPolicy()
|
||||
// // ensure joining html elements are spaced apart, eg table cells etc
|
||||
h = strings.ReplaceAll(h, "><", "> <")
|
||||
// return p.Sanitize(h)
|
||||
return html.UnescapeString(p.Sanitize(h))
|
||||
}
|
||||
|
72
internal/tools/html2text/html2text.go
Normal file
72
internal/tools/html2text/html2text.go
Normal file
@ -0,0 +1,72 @@
|
||||
// Package html2text is a simple library to convert HTML to plain text
|
||||
package html2text
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"log"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
var (
|
||||
re = regexp.MustCompile(`\s+`)
|
||||
spaceRe = regexp.MustCompile(`(?mi)<\/(div|p|td|th|h[1-6]|ul|ol|li|address|article|aside|blockquote|dl|dt|footer|header|hr|main|nav|pre|table|thead|tfoot|video)><`)
|
||||
brRe = regexp.MustCompile(`(?mi)<(br /|br)>`)
|
||||
imgRe = regexp.MustCompile(`(?mi)<(img)`)
|
||||
skip = make(map[string]bool)
|
||||
)
|
||||
|
||||
func init() {
|
||||
skip["script"] = true
|
||||
skip["title"] = true
|
||||
skip["head"] = true
|
||||
skip["link"] = true
|
||||
skip["meta"] = true
|
||||
skip["style"] = true
|
||||
skip["noscript"] = true
|
||||
}
|
||||
|
||||
// Strip will convert a HTML string to plain text
|
||||
func Strip(h string, includeLinks bool) string {
|
||||
h = spaceRe.ReplaceAllString(h, "</$1> <")
|
||||
h = brRe.ReplaceAllString(h, " ")
|
||||
h = imgRe.ReplaceAllString(h, " <$1")
|
||||
var buffer bytes.Buffer
|
||||
doc, err := html.Parse(strings.NewReader(h))
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
extract(doc, &buffer, includeLinks)
|
||||
return clean(buffer.String())
|
||||
}
|
||||
|
||||
func extract(node *html.Node, buff *bytes.Buffer, includeLinks bool) {
|
||||
if node.Type == html.TextNode {
|
||||
data := node.Data
|
||||
if data != "" {
|
||||
buff.WriteString(data)
|
||||
}
|
||||
}
|
||||
for c := node.FirstChild; c != nil; c = c.NextSibling {
|
||||
if _, skip := skip[c.Data]; !skip {
|
||||
if includeLinks && c.Data == "a" {
|
||||
for _, a := range c.Attr {
|
||||
if a.Key == "href" && strings.HasPrefix(strings.ToLower(a.Val), "http") {
|
||||
buff.WriteString(" " + a.Val + " ")
|
||||
}
|
||||
}
|
||||
}
|
||||
extract(c, buff, includeLinks)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func clean(text string) string {
|
||||
// replace \uFEFF with space, see https://github.com/golang/go/issues/42274#issuecomment-1017258184
|
||||
text = strings.ReplaceAll(text, string('\uFEFF'), " ")
|
||||
text = re.ReplaceAllString(text, " ")
|
||||
return strings.TrimSpace(text)
|
||||
}
|
56
internal/tools/html2text/html2text_test.go
Normal file
56
internal/tools/html2text/html2text_test.go
Normal file
@ -0,0 +1,56 @@
|
||||
package html2text
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestPlain(t *testing.T) {
|
||||
tests := map[string]string{}
|
||||
tests["this is a test"] = "this is a test"
|
||||
tests["thiS IS a Test"] = "thiS IS a Test"
|
||||
tests["thiS IS a Test :-)"] = "thiS IS a Test :-)"
|
||||
tests["<h1>This is a test.</h1> "] = "This is a test."
|
||||
tests["<p>Paragraph 1</p><p>Paragraph 2</p>"] = "Paragraph 1 Paragraph 2"
|
||||
tests["<h1>Heading</h1><p>Paragraph</p>"] = "Heading Paragraph"
|
||||
tests["<span>Alpha</span>bet <strong>chars</strong>"] = "Alphabet chars"
|
||||
tests["<span><b>A</b>lpha</span>bet chars."] = "Alphabet chars."
|
||||
tests["<table><tr><td>First</td><td>Second</td></table>"] = "First Second"
|
||||
tests[`<h1>Heading</h1>
|
||||
<p>Paragraph</p>`] = "Heading Paragraph"
|
||||
tests[`<h1>Heading</h1><p> <a href="https://github.com">linked text</a></p>`] = "Heading linked text"
|
||||
// broken html
|
||||
tests[`<h1>Heading</h3><p> <a href="https://github.com">linked text.`] = "Heading linked text."
|
||||
|
||||
for str, expected := range tests {
|
||||
res := Strip(str, false)
|
||||
if res != expected {
|
||||
t.Log("error:", res, "!=", expected)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWithLinks(t *testing.T) {
|
||||
tests := map[string]string{}
|
||||
tests["this is a test"] = "this is a test"
|
||||
tests["thiS IS a Test"] = "thiS IS a Test"
|
||||
tests["thiS IS a Test :-)"] = "thiS IS a Test :-)"
|
||||
tests["<h1>This is a test.</h1> "] = "This is a test."
|
||||
tests["<p>Paragraph 1</p><p>Paragraph 2</p>"] = "Paragraph 1 Paragraph 2"
|
||||
tests["<h1>Heading</h1><p>Paragraph</p>"] = "Heading Paragraph"
|
||||
tests["<span>Alpha</span>bet <strong>chars</strong>"] = "Alphabet chars"
|
||||
tests["<span><b>A</b>lpha</span>bet chars."] = "Alphabet chars."
|
||||
tests["<table><tr><td>First</td><td>Second</td></table>"] = "First Second"
|
||||
tests["<h1>Heading</h1><p>Paragraph</p>"] = "Heading Paragraph"
|
||||
tests[`<h1>Heading</h1>
|
||||
<p>Paragraph</p>`] = "Heading Paragraph"
|
||||
tests[`<h1>Heading</h1><p> <a href="https://github.com">linked text</a></p>`] = "Heading https://github.com linked text"
|
||||
// broken html
|
||||
tests[`<h1>Heading</h3><p> <a href="https://github.com">linked text.`] = "Heading https://github.com linked text."
|
||||
|
||||
for str, expected := range tests {
|
||||
res := Strip(str, true)
|
||||
if res != expected {
|
||||
t.Log("error:", res, "!=", expected)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
}
|
@ -3,6 +3,8 @@ package tools
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/axllent/mailpit/internal/tools/html2text"
|
||||
)
|
||||
|
||||
// CreateSnippet returns a message snippet. It will use the HTML version (if it exists)
|
||||
@ -12,17 +14,13 @@ func CreateSnippet(text, html string) string {
|
||||
html = strings.TrimSpace(html)
|
||||
limit := 200
|
||||
spaceRe := regexp.MustCompile(`\s+`)
|
||||
nlRe := regexp.MustCompile(`\r?\n`)
|
||||
|
||||
if text == "" && html == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
if html != "" {
|
||||
data := nlRe.ReplaceAllString(stripHTML(html), " ")
|
||||
// replace \uFEFF with space, see https://github.com/golang/go/issues/42274#issuecomment-1017258184
|
||||
data = strings.ReplaceAll(data, string('\uFEFF'), " ")
|
||||
data = strings.TrimSpace(spaceRe.ReplaceAllString(data, " "))
|
||||
data := html2text.Strip(html, false)
|
||||
|
||||
if len(data) <= limit {
|
||||
return data
|
||||
|
Loading…
Reference in New Issue
Block a user