diff --git a/pkg/html/dynamic/document.go b/pkg/html/dynamic/document.go index 88952c3c..28692997 100644 --- a/pkg/html/dynamic/document.go +++ b/pkg/html/dynamic/document.go @@ -698,6 +698,17 @@ func (doc *HTMLDocument) NavigateForward(skip values.Int, timeout values.Int) (v return values.True, nil } +func (doc *HTMLDocument) PrintToPDF(params *page.PrintToPDFArgs) (core.Value, error) { + ctx := context.Background() + + reply, err := doc.client.Page.PrintToPDF(ctx, params) + if err != nil { + return values.None, err + } + + return values.NewBinary(reply.Data), nil +} + func (doc *HTMLDocument) CaptureScreenshot(params *ScreenshotArgs) (core.Value, error) { ctx := context.Background() metrics, err := doc.client.Page.GetLayoutMetrics(ctx) diff --git a/pkg/stdlib/html/blob.go b/pkg/stdlib/html/blob.go index dd72b0e5..ea3a53fd 100644 --- a/pkg/stdlib/html/blob.go +++ b/pkg/stdlib/html/blob.go @@ -3,6 +3,9 @@ package html import ( "context" "fmt" + "regexp" + + "github.com/mafredri/cdp/protocol/page" "github.com/MontFerret/ferret/pkg/html/dynamic" "github.com/MontFerret/ferret/pkg/runtime/values" @@ -10,6 +13,31 @@ import ( "github.com/MontFerret/ferret/pkg/runtime/core" ) +func ValidateDocument(ctx context.Context, value core.Value) (core.Value, error) { + err := core.ValidateType(value, core.HTMLDocumentType, core.StringType) + if err != nil { + return values.None, err + } + + var doc *dynamic.HTMLDocument + var ok bool + if value.Type() == core.StringType { + buf, err := Document(ctx, value, values.NewBoolean(true)) + if err != nil { + return values.None, err + } + doc, ok = buf.(*dynamic.HTMLDocument) + } else { + doc, ok = value.(*dynamic.HTMLDocument) + } + + if !ok { + return nil, core.Error(core.ErrInvalidType, "expected dynamic document") + } + + return doc, nil +} + /* * Take a screenshot of the current page. * @param source (Document) - Document. @@ -35,22 +63,12 @@ func Screenshot(ctx context.Context, args ...core.Value) (core.Value, error) { return values.None, err } - var doc *dynamic.HTMLDocument - var ok bool - if arg1.Type() == core.StringType { - buf, err := Document(ctx, arg1, values.NewBoolean(true)) - if err != nil { - return values.None, err - } - doc, ok = buf.(*dynamic.HTMLDocument) - defer doc.Close() - } else { - doc, ok = arg1.(*dynamic.HTMLDocument) - } - - if !ok { - return values.None, core.Error(core.ErrInvalidType, "expected dynamic document") + val, err := ValidateDocument(ctx, arg1) + if err != nil { + return values.None, err } + doc := val.(*dynamic.HTMLDocument) + defer doc.Close() screenshotParams := &dynamic.ScreenshotArgs{ X: 0, @@ -145,3 +163,217 @@ func Screenshot(ctx context.Context, args ...core.Value) (core.Value, error) { return scr, nil } + +func ValidatePageRanges(pageRanges string) (bool, error) { + match, err := regexp.Match(`^(([1-9][0-9]*|[1-9][0-9]*)(\s*-\s*|\s*,\s*|))*$`, []byte(pageRanges)) + if err != nil { + return false, err + } + return match, nil +} + +/* + * Print a PDF of the current page. + * @param source (Document) - Document. + * @param params (Object) - Optional, An object containing the following properties : + * Landscape (Bool) - Paper orientation. Defaults to false. + * DisplayHeaderFooter (Bool) - Display header and footer. Defaults to false. + * PrintBackground (Bool) - Print background graphics. Defaults to false. + * Scale (Float64) - Scale of the webpage rendering. Defaults to 1. + * PaperWidth (Float64) - Paper width in inches. Defaults to 8.5 inches. + * PaperHeight (Float64) - Paper height in inches. Defaults to 11 inches. + * MarginTop (Float64) - Top margin in inches. Defaults to 1cm (~0.4 inches). + * MarginBottom (Float64) - Bottom margin in inches. Defaults to 1cm (~0.4 inches). + * MarginLeft (Float64) - Left margin in inches. Defaults to 1cm (~0.4 inches). + * MarginRight (Float64) - Right margin in inches. Defaults to 1cm (~0.4 inches). + * PageRanges (String) - Paper ranges to print, e.g., '1-5, 8, 11-13'. Defaults to the empty string, which means print all pages. + * IgnoreInvalidPageRanges (Bool) - to silently ignore invalid but successfully parsed page ranges, such as '3-2'. Defaults to false. + * HeaderTemplate (String) - HTML template for the print header. Should be valid HTML markup with following classes used to inject printing values into them: - `date`: formatted print date - `title`: document title - `url`: document location - `pageNumber`: current page number - `totalPages`: total pages in the document For example, `` would generate span containing the title. + * FooterTemplate (String) - HTML template for the print footer. Should use the same format as the `headerTemplate`. + * PreferCSSPageSize (Bool) - Whether or not to prefer page size as defined by css. Defaults to false, in which case the content will be scaled to fit the paper size. * + * @returns data (Binary) - Returns a base64 encoded string in binary format. + */ +func PDF(ctx context.Context, args ...core.Value) (core.Value, error) { + err := core.ValidateArgs(args, 1, 2) + if err != nil { + return values.None, err + } + + arg1 := args[0] + val, err := ValidateDocument(ctx, arg1) + if err != nil { + return values.None, err + } + doc := val.(*dynamic.HTMLDocument) + defer doc.Close() + + pdfParams := page.NewPrintToPDFArgs() + + if len(args) == 2 { + arg2 := args[1] + err = core.ValidateType(arg2, core.ObjectType) + if err != nil { + return values.None, err + } + params, ok := arg2.(*values.Object) + if !ok { + return values.None, core.Error(core.ErrInvalidType, "expected object") + } + + landscape, found := params.Get("landscape") + if found { + err = core.ValidateType(landscape, core.BooleanType) + if err != nil { + return values.None, err + } + pdfParams.SetLandscape(landscape.Unwrap().(bool)) + } + displayHeaderFooter, found := params.Get("displayHeaderFooter") + if found { + err = core.ValidateType(displayHeaderFooter, core.BooleanType) + if err != nil { + return values.None, err + } + pdfParams.SetDisplayHeaderFooter(displayHeaderFooter.Unwrap().(bool)) + } + printBackground, found := params.Get("printBackground") + if found { + err = core.ValidateType(printBackground, core.BooleanType) + if err != nil { + return values.None, err + } + pdfParams.SetPrintBackground(printBackground.Unwrap().(bool)) + } + scale, found := params.Get("scale") + if found { + err = core.ValidateType(scale, core.FloatType, core.IntType) + if err != nil { + return values.None, err + } + if scale.Type() == core.IntType { + scale = values.Float(scale.(values.Int)) + } + pdfParams.SetScale(scale.Unwrap().(float64)) + } + paperWidth, found := params.Get("paperWidth") + if found { + err = core.ValidateType(paperWidth, core.FloatType, core.IntType) + if err != nil { + return values.None, err + } + if paperWidth.Type() == core.IntType { + paperWidth = values.Float(paperWidth.(values.Int)) + } + pdfParams.SetPaperWidth(paperWidth.Unwrap().(float64)) + } + paperHeight, found := params.Get("paperHeight") + if found { + err = core.ValidateType(paperHeight, core.FloatType, core.IntType) + if err != nil { + return values.None, err + } + if paperHeight.Type() == core.IntType { + paperHeight = values.Float(paperHeight.(values.Int)) + } + pdfParams.SetPaperHeight(paperHeight.Unwrap().(float64)) + } + marginTop, found := params.Get("marginTop") + if found { + err = core.ValidateType(marginTop, core.FloatType, core.IntType) + if err != nil { + return values.None, err + } + if marginTop.Type() == core.IntType { + marginTop = values.Float(marginTop.(values.Int)) + } + pdfParams.SetMarginTop(marginTop.Unwrap().(float64)) + } + marginBottom, found := params.Get("marginBottom") + if found { + err = core.ValidateType(marginBottom, core.FloatType, core.IntType) + if err != nil { + return values.None, err + } + if marginBottom.Type() == core.IntType { + marginBottom = values.Float(marginBottom.(values.Int)) + } + pdfParams.SetMarginBottom(marginBottom.Unwrap().(float64)) + } + marginLeft, found := params.Get("marginLeft") + if found { + err = core.ValidateType(marginLeft, core.FloatType, core.IntType) + if err != nil { + return values.None, err + } + if marginLeft.Type() == core.IntType { + marginLeft = values.Float(marginLeft.(values.Int)) + } + pdfParams.SetMarginLeft(marginLeft.Unwrap().(float64)) + } + marginRight, found := params.Get("marginRight") + if found { + err = core.ValidateType(marginRight, core.FloatType, core.IntType) + if err != nil { + return values.None, err + } + if marginRight.Type() == core.IntType { + marginRight = values.Float(marginRight.(values.Int)) + } + pdfParams.SetMarginRight(marginRight.Unwrap().(float64)) + } + pageRanges, found := params.Get("pageRanges") + if found { + err = core.ValidateType(pageRanges, core.StringType) + if err != nil { + return values.None, err + } + validate, err := ValidatePageRanges(pageRanges.String()) + if err != nil { + return values.None, err + } + if !validate { + return values.None, core.Error(core.ErrInvalidArgument, fmt.Sprintf(`page ranges "%s", not valid`, pageRanges.String())) + } + pdfParams.SetPageRanges(pageRanges.String()) + } + ignoreInvalidPageRanges, found := params.Get("ignoreInvalidPageRanges") + if found { + err = core.ValidateType(ignoreInvalidPageRanges, core.BooleanType) + if err != nil { + return values.None, err + } + pdfParams.SetIgnoreInvalidPageRanges(ignoreInvalidPageRanges.Unwrap().(bool)) + } + headerTemplate, found := params.Get("headerTemplate") + if found { + err = core.ValidateType(headerTemplate, core.StringType) + if err != nil { + return values.None, err + } + pdfParams.SetHeaderTemplate(headerTemplate.String()) + } + footerTemplate, found := params.Get("footerTemplate") + if found { + err = core.ValidateType(footerTemplate, core.StringType) + if err != nil { + return values.None, err + } + pdfParams.SetFooterTemplate(footerTemplate.String()) + } + preferCSSPageSize, found := params.Get("preferCSSPageSize") + if found { + err = core.ValidateType(preferCSSPageSize, core.BooleanType) + if err != nil { + return values.None, err + } + pdfParams.SetPreferCSSPageSize(preferCSSPageSize.Unwrap().(bool)) + } + } + + pdf, err := doc.PrintToPDF(pdfParams) + if err != nil { + return values.None, err + } + + return pdf, nil +} diff --git a/pkg/stdlib/html/lib.go b/pkg/stdlib/html/lib.go index 70fbbdba..569e17c0 100644 --- a/pkg/stdlib/html/lib.go +++ b/pkg/stdlib/html/lib.go @@ -33,5 +33,6 @@ func NewLib() map[string]core.Function { "INNER_TEXT": InnerText, "INNER_TEXT_ALL": InnerTextAll, "SCREENSHOT": Screenshot, + "PDF": PDF, } }