diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index deea3d7..ce99ba5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -26,7 +26,7 @@ jobs: restore-keys: | ${{ runner.os }}-go- - run: go test ./internal/storage ./server ./internal/tools ./internal/tools/html2text -v - - run: go test ./internal/storage -bench=. + - run: go test ./internal/storage ./internal/tools/html2text -bench=. # build the assets - name: Build web UI diff --git a/internal/tools/html2text/html2text.go b/internal/tools/html2text/html2text.go index 0940d06..9f3f6f6 100644 --- a/internal/tools/html2text/html2text.go +++ b/internal/tools/html2text/html2text.go @@ -6,6 +6,7 @@ import ( "log" "regexp" "strings" + "unicode" "golang.org/x/net/html" ) @@ -67,6 +68,15 @@ func extract(node *html.Node, buff *bytes.Buffer, includeLinks bool) { func clean(text string) string { // replace \uFEFF with space, see https://github.com/golang/go/issues/42274#issuecomment-1017258184 text = strings.ReplaceAll(text, string('\uFEFF'), " ") + + // remove non-printable characters + text = strings.Map(func(r rune) rune { + if unicode.IsPrint(r) { + return r + } + return []rune(" ")[0] + }, text) + text = re.ReplaceAllString(text, " ") return strings.TrimSpace(text) } diff --git a/internal/tools/html2text/html2text_test.go b/internal/tools/html2text/html2text_test.go index dd17639..96ca9a1 100644 --- a/internal/tools/html2text/html2text_test.go +++ b/internal/tools/html2text/html2text_test.go @@ -54,3 +54,197 @@ func TestWithLinks(t *testing.T) { } } } + +func BenchmarkPlain(b *testing.B) { + for i := 0; i < b.N; i++ { + Strip(htmlTestData, false) + } +} + +func BenchmarkLinks(b *testing.B) { + for i := 0; i < b.N; i++ { + Strip(htmlTestData, true) + } +} + +var htmlTestData = ` + + + + + [axllent/mailpit] Run failed: .github/workflows/tests.yml - feature/swagger (284335a) + + + + + + + +
+
+ + + + +
+ + + + + + +
 
+ + + + + +
+ GitHub +

+ [axllent/mailpit] .github/workflows/tests.yml workflow run + +

+
+ + + + + + +
 
+ +
+ + + + +
+ + + + +
+ + + + +
+ + + + + +
+ + + + +
+ + + + + + + + +
 
+ +

.github/workflows/tests.yml: No jobs were run

+ + + + + + +
 
+ + + + + + + +
+ + + + +
+ + + + +
+ + View workflow run + +
+
+ +
+ + + + + + +
 
+ + +
+
+ + + + +
+
+
+ + + + +
+ + + + + + +
 
+ + + + + + + +
 
+ +


You are receiving this because you are subscribed to this thread.
Manage your GitHub Actions notifications

+ +
+ + + + +
+ + + + + + +
 
+ +

GitHub, Inc. ・88 Colin P Kelly Jr Street ・San Francisco, CA 94107

+
+ +
+
+ +
                                                           
+ +`