1
0
mirror of https://github.com/axllent/mailpit.git synced 2025-08-13 20:04:49 +02:00

Fix: Relax URL detection in link check tool (#357)

This commit is contained in:
Ralph Slooten
2024-09-05 17:15:53 +12:00
parent dc1a16ed5c
commit 2a0853d21a
3 changed files with 73 additions and 2 deletions

View File

@@ -26,7 +26,7 @@ jobs:
key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-go-
- run: go test -p 1 ./internal/storage ./server ./server/pop3 ./internal/tools ./internal/html2text -v
- run: go test -p 1 ./internal/storage ./server ./server/pop3 ./internal/tools ./internal/html2text ./internal/linkcheck -v
- run: go test -p 1 ./internal/storage ./internal/html2text -bench=.
# build the assets

View File

@@ -0,0 +1,71 @@
package linkcheck
import (
"reflect"
"testing"
"github.com/axllent/mailpit/internal/storage"
)
var (
testHTML = `
<html>
<head>
<link rel=stylesheet href="http://remote-host/style.css"></link>
<script async src="https://www.googletagmanager.com/gtag/js?id=ignored"></script>
</head>
<body>
<div>
<p><a href="http://example.com">HTTP link</a></p>
<p><a href="https://example.com">HTTPS link</a></p>
<p><a href="HTTPS://EXAMPLE.COM">HTTPS link</a></p>
<p><a href="http://localhost">Localhost link</a> (ignored)</p>
<p><a href="https://localhost">Localhost link</a> (ignored)</p>
<p><a href='https://127.0.0.1'>Single quotes link</a> (ignored)</p>
<p><img src=https://example.com/image.jpg></p>
<p href="http://invalid-link.com">This should be ignored</p>
<p><a href="http://link with spaces">Link with spaces</a></p>
<p><a href="http://example.com/?blaah=yes&amp;test=true">URL-encoded characters</a></p>
</div>
</body>
</html>`
expectedHTMLLinks = []string{
"http://example.com", "https://example.com", "HTTPS://EXAMPLE.COM", "http://localhost", "https://localhost", "https://127.0.0.1", "http://link with spaces", "http://example.com/?blaah=yes&test=true",
"http://remote-host/style.css", // css
"https://example.com/image.jpg", // images
}
testTextLinks = `This is a line with http://example.com https://example.com
HTTPS://EXAMPLE.COM
[http://localhost]
www.google.com < ignored
|||http://example.com/?some=query-string|||
`
expectedTextLinks = []string{
"http://example.com", "https://example.com", "HTTPS://EXAMPLE.COM", "http://localhost", "http://example.com/?some=query-string",
}
)
func TestLinkDetection(t *testing.T) {
t.Log("Testing HTML link detection")
m := storage.Message{}
m.Text = testTextLinks
m.HTML = testHTML
textLinks := extractTextLinks(&m)
if !reflect.DeepEqual(textLinks, expectedTextLinks) {
t.Fatalf("Failed to detect text links correctly")
}
htmlLinks := extractHTMLLinks(&m)
if !reflect.DeepEqual(htmlLinks, expectedHTMLLinks) {
t.Fatalf("Failed to detect HTML links correctly")
}
}

View File

@@ -10,7 +10,7 @@ import (
"github.com/axllent/mailpit/internal/tools"
)
var linkRe = regexp.MustCompile(`(?m)\b(http|ftp|https):\/\/([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:'!\/~+#-]*[\w@?^=%&\/~+#-])`)
var linkRe = regexp.MustCompile(`(?im)\b(http|https):\/\/([\-\w@:%_\+'!.~#?,&\/\/=;]+)`)
// RunTests will run all tests on an HTML string
func RunTests(msg *storage.Message, followRedirects bool) (Response, error) {