mirror of
https://github.com/MontFerret/ferret.git
synced 2024-12-16 11:37:36 +02:00
403 lines
12 KiB
Go
403 lines
12 KiB
Go
package html
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"net/http"
|
|
"regexp"
|
|
|
|
"github.com/mafredri/cdp/protocol/page"
|
|
|
|
"github.com/MontFerret/ferret/pkg/html/dynamic"
|
|
"github.com/MontFerret/ferret/pkg/runtime/values"
|
|
|
|
"github.com/MontFerret/ferret/pkg/runtime/core"
|
|
)
|
|
|
|
func ValidateDocument(ctx context.Context, value core.Value) (core.Value, error) {
|
|
err := core.ValidateType(value, core.HTMLDocumentType, core.StringType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
|
|
var doc *dynamic.HTMLDocument
|
|
var ok bool
|
|
if value.Type() == core.StringType {
|
|
buf, err := Document(ctx, value, values.NewBoolean(true))
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
doc, ok = buf.(*dynamic.HTMLDocument)
|
|
} else {
|
|
doc, ok = value.(*dynamic.HTMLDocument)
|
|
}
|
|
|
|
if !ok {
|
|
return nil, core.Error(core.ErrInvalidType, "expected dynamic document")
|
|
}
|
|
|
|
return doc, nil
|
|
}
|
|
|
|
// Screenshot take a screenshot of the current page.
|
|
// @param source (Document) - Document.
|
|
// @param params (Object) - Optional, An object containing the following properties :
|
|
// x (Float|Int) - Optional, X position of the viewport.
|
|
// x (Float|Int) - Optional,Y position of the viewport.
|
|
// width (Float|Int) - Optional, Width of the viewport.
|
|
// height (Float|Int) - Optional, Height of the viewport.
|
|
// format (String) - Optional, Either "jpeg" or "png".
|
|
// quality (Int) - Optional, Quality, in [0, 100], only for jpeg format.
|
|
// @returns data (Binary) - Returns a base64 encoded string in binary format.
|
|
func Screenshot(ctx context.Context, args ...core.Value) (core.Value, error) {
|
|
err := core.ValidateArgs(args, 1, 2)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
|
|
arg1 := args[0]
|
|
|
|
err = core.ValidateType(arg1, core.HTMLDocumentType, core.StringType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
|
|
val, err := ValidateDocument(ctx, arg1)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
doc := val.(*dynamic.HTMLDocument)
|
|
defer doc.Close()
|
|
|
|
screenshotParams := &dynamic.ScreenshotArgs{
|
|
X: 0,
|
|
Y: 0,
|
|
Width: -1,
|
|
Height: -1,
|
|
Format: "jpeg",
|
|
Quality: 100,
|
|
}
|
|
if len(args) == 2 {
|
|
arg2 := args[1]
|
|
err = core.ValidateType(arg2, core.ObjectType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
params, ok := arg2.(*values.Object)
|
|
if !ok {
|
|
return values.None, core.Error(core.ErrInvalidType, "expected object")
|
|
}
|
|
|
|
format, found := params.Get("format")
|
|
if found {
|
|
err = core.ValidateType(format, core.StringType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if !dynamic.IsScreenshotFormatValid(format.String()) {
|
|
return values.None, core.Error(
|
|
core.ErrInvalidArgument,
|
|
fmt.Sprintf("format is not valid, expected jpeg or png, but got %s", format.String()))
|
|
}
|
|
screenshotParams.Format = dynamic.ScreenshotFormat(format.String())
|
|
}
|
|
x, found := params.Get("x")
|
|
if found {
|
|
err = core.ValidateType(x, core.FloatType, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if x.Type() == core.IntType {
|
|
x = values.Float(x.(values.Int))
|
|
}
|
|
screenshotParams.X = x.Unwrap().(float64)
|
|
}
|
|
y, found := params.Get("y")
|
|
if found {
|
|
err = core.ValidateType(y, core.FloatType, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if y.Type() == core.IntType {
|
|
y = values.Float(y.(values.Int))
|
|
}
|
|
screenshotParams.Y = y.Unwrap().(float64)
|
|
}
|
|
width, found := params.Get("width")
|
|
if found {
|
|
err = core.ValidateType(width, core.FloatType, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if width.Type() == core.IntType {
|
|
width = values.Float(width.(values.Int))
|
|
}
|
|
screenshotParams.Width = width.Unwrap().(float64)
|
|
}
|
|
height, found := params.Get("height")
|
|
if found {
|
|
err = core.ValidateType(height, core.FloatType, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if height.Type() == core.IntType {
|
|
height = values.Float(height.(values.Int))
|
|
}
|
|
screenshotParams.Height = height.Unwrap().(float64)
|
|
}
|
|
quality, found := params.Get("quality")
|
|
if found {
|
|
err = core.ValidateType(quality, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
screenshotParams.Quality = quality.Unwrap().(int)
|
|
}
|
|
}
|
|
|
|
scr, err := doc.CaptureScreenshot(screenshotParams)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
|
|
return scr, nil
|
|
}
|
|
|
|
func ValidatePageRanges(pageRanges string) (bool, error) {
|
|
match, err := regexp.Match(`^(([1-9][0-9]*|[1-9][0-9]*)(\s*-\s*|\s*,\s*|))*$`, []byte(pageRanges))
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
return match, nil
|
|
}
|
|
|
|
// PDF print a PDF of the current page.
|
|
// @param source (Document) - Document.
|
|
// @param params (Object) - Optional, An object containing the following properties :
|
|
// Landscape (Bool) - Paper orientation. Defaults to false.
|
|
// DisplayHeaderFooter (Bool) - Display header and footer. Defaults to false.
|
|
// PrintBackground (Bool) - Print background graphics. Defaults to false.
|
|
// Scale (Float64) - Scale of the webpage rendering. Defaults to 1.
|
|
// PaperWidth (Float64) - Paper width in inches. Defaults to 8.5 inches.
|
|
// PaperHeight (Float64) - Paper height in inches. Defaults to 11 inches.
|
|
// MarginTop (Float64) - Top margin in inches. Defaults to 1cm (~0.4 inches).
|
|
// MarginBottom (Float64) - Bottom margin in inches. Defaults to 1cm (~0.4 inches).
|
|
// MarginLeft (Float64) - Left margin in inches. Defaults to 1cm (~0.4 inches).
|
|
// MarginRight (Float64) - Right margin in inches. Defaults to 1cm (~0.4 inches).
|
|
// PageRanges (String) - Paper ranges to print, e.g., '1-5, 8, 11-13'. Defaults to the empty string, which means print all pages.
|
|
// IgnoreInvalidPageRanges (Bool) - to silently ignore invalid but successfully parsed page ranges, such as '3-2'. Defaults to false.
|
|
// HeaderTemplate (String) - HTML template for the print header. Should be valid HTML markup with following classes used to inject printing values into them: - `date`: formatted print date - `title`: document title - `url`: document location - `pageNumber`: current page number - `totalPages`: total pages in the document For example, `<span class=title></span>` would generate span containing the title.
|
|
// FooterTemplate (String) - HTML template for the print footer. Should use the same format as the `headerTemplate`.
|
|
// PreferCSSPageSize (Bool) - Whether or not to prefer page size as defined by css. Defaults to false, in which case the content will be scaled to fit the paper size. *
|
|
// @returns data (Binary) - Returns a base64 encoded string in binary format.
|
|
func PDF(ctx context.Context, args ...core.Value) (core.Value, error) {
|
|
err := core.ValidateArgs(args, 1, 2)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
|
|
arg1 := args[0]
|
|
val, err := ValidateDocument(ctx, arg1)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
doc := val.(*dynamic.HTMLDocument)
|
|
defer doc.Close()
|
|
|
|
pdfParams := page.NewPrintToPDFArgs()
|
|
|
|
if len(args) == 2 {
|
|
arg2 := args[1]
|
|
err = core.ValidateType(arg2, core.ObjectType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
params, ok := arg2.(*values.Object)
|
|
if !ok {
|
|
return values.None, core.Error(core.ErrInvalidType, "expected object")
|
|
}
|
|
|
|
landscape, found := params.Get("landscape")
|
|
if found {
|
|
err = core.ValidateType(landscape, core.BooleanType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
pdfParams.SetLandscape(landscape.Unwrap().(bool))
|
|
}
|
|
displayHeaderFooter, found := params.Get("displayHeaderFooter")
|
|
if found {
|
|
err = core.ValidateType(displayHeaderFooter, core.BooleanType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
pdfParams.SetDisplayHeaderFooter(displayHeaderFooter.Unwrap().(bool))
|
|
}
|
|
printBackground, found := params.Get("printBackground")
|
|
if found {
|
|
err = core.ValidateType(printBackground, core.BooleanType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
pdfParams.SetPrintBackground(printBackground.Unwrap().(bool))
|
|
}
|
|
scale, found := params.Get("scale")
|
|
if found {
|
|
err = core.ValidateType(scale, core.FloatType, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if scale.Type() == core.IntType {
|
|
scale = values.Float(scale.(values.Int))
|
|
}
|
|
pdfParams.SetScale(scale.Unwrap().(float64))
|
|
}
|
|
paperWidth, found := params.Get("paperWidth")
|
|
if found {
|
|
err = core.ValidateType(paperWidth, core.FloatType, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if paperWidth.Type() == core.IntType {
|
|
paperWidth = values.Float(paperWidth.(values.Int))
|
|
}
|
|
pdfParams.SetPaperWidth(paperWidth.Unwrap().(float64))
|
|
}
|
|
paperHeight, found := params.Get("paperHeight")
|
|
if found {
|
|
err = core.ValidateType(paperHeight, core.FloatType, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if paperHeight.Type() == core.IntType {
|
|
paperHeight = values.Float(paperHeight.(values.Int))
|
|
}
|
|
pdfParams.SetPaperHeight(paperHeight.Unwrap().(float64))
|
|
}
|
|
marginTop, found := params.Get("marginTop")
|
|
if found {
|
|
err = core.ValidateType(marginTop, core.FloatType, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if marginTop.Type() == core.IntType {
|
|
marginTop = values.Float(marginTop.(values.Int))
|
|
}
|
|
pdfParams.SetMarginTop(marginTop.Unwrap().(float64))
|
|
}
|
|
marginBottom, found := params.Get("marginBottom")
|
|
if found {
|
|
err = core.ValidateType(marginBottom, core.FloatType, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if marginBottom.Type() == core.IntType {
|
|
marginBottom = values.Float(marginBottom.(values.Int))
|
|
}
|
|
pdfParams.SetMarginBottom(marginBottom.Unwrap().(float64))
|
|
}
|
|
marginLeft, found := params.Get("marginLeft")
|
|
if found {
|
|
err = core.ValidateType(marginLeft, core.FloatType, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if marginLeft.Type() == core.IntType {
|
|
marginLeft = values.Float(marginLeft.(values.Int))
|
|
}
|
|
pdfParams.SetMarginLeft(marginLeft.Unwrap().(float64))
|
|
}
|
|
marginRight, found := params.Get("marginRight")
|
|
if found {
|
|
err = core.ValidateType(marginRight, core.FloatType, core.IntType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if marginRight.Type() == core.IntType {
|
|
marginRight = values.Float(marginRight.(values.Int))
|
|
}
|
|
pdfParams.SetMarginRight(marginRight.Unwrap().(float64))
|
|
}
|
|
pageRanges, found := params.Get("pageRanges")
|
|
if found {
|
|
err = core.ValidateType(pageRanges, core.StringType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
validate, err := ValidatePageRanges(pageRanges.String())
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
if !validate {
|
|
return values.None, core.Error(core.ErrInvalidArgument, fmt.Sprintf(`page ranges "%s", not valid`, pageRanges.String()))
|
|
}
|
|
pdfParams.SetPageRanges(pageRanges.String())
|
|
}
|
|
ignoreInvalidPageRanges, found := params.Get("ignoreInvalidPageRanges")
|
|
if found {
|
|
err = core.ValidateType(ignoreInvalidPageRanges, core.BooleanType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
pdfParams.SetIgnoreInvalidPageRanges(ignoreInvalidPageRanges.Unwrap().(bool))
|
|
}
|
|
headerTemplate, found := params.Get("headerTemplate")
|
|
if found {
|
|
err = core.ValidateType(headerTemplate, core.StringType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
pdfParams.SetHeaderTemplate(headerTemplate.String())
|
|
}
|
|
footerTemplate, found := params.Get("footerTemplate")
|
|
if found {
|
|
err = core.ValidateType(footerTemplate, core.StringType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
pdfParams.SetFooterTemplate(footerTemplate.String())
|
|
}
|
|
preferCSSPageSize, found := params.Get("preferCSSPageSize")
|
|
if found {
|
|
err = core.ValidateType(preferCSSPageSize, core.BooleanType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
pdfParams.SetPreferCSSPageSize(preferCSSPageSize.Unwrap().(bool))
|
|
}
|
|
}
|
|
|
|
pdf, err := doc.PrintToPDF(pdfParams)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
|
|
return pdf, nil
|
|
}
|
|
|
|
// Download a ressource from the given URL.
|
|
// @param URL (String) - URL to download.
|
|
// @returns data (Binary) - Returns a base64 encoded string in binary format.
|
|
func Download(_ context.Context, args ...core.Value) (core.Value, error) {
|
|
err := core.ValidateArgs(args, 1, 1)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
|
|
arg1 := args[0]
|
|
err = core.ValidateType(arg1, core.StringType)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
resp, err := http.Get(arg1.String())
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
data, err := ioutil.ReadAll(resp.Body)
|
|
if err != nil {
|
|
return values.None, err
|
|
}
|
|
return values.NewBinary(data), nil
|
|
}
|