fpspreadsheet: Improved rendering of white space by the html reader.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@4262 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2015-08-10 20:19:51 +00:00
parent caacb195e4
commit 89b1fad36e
2 changed files with 27 additions and 27 deletions

View File

@@ -921,38 +921,16 @@ begin
end; end;
procedure TsHTMLReader.TextFoundHandler(AText: String); procedure TsHTMLReader.TextFoundHandler(AText: String);
// Todo: find correct way to retain spaces
// Example:
// <td>123<b>abc</b> is rendered by browser as 123abc (with abc bold)
// <td>123
// <b>abc</b> is rendered as 123 abc
// The current way is not good.
var
beginsWithLineEnding, endsWithLineEnding: Boolean;
begin begin
if FInCell then if FInCell then
begin begin
beginsWithLineEnding := (AText <> '') and (AText[1] in [#13, #10]);
endsWithLineEnding := (AText <> '') and (AText[Length(AText)] in [#13,#10]);
AText := CleanHTMLString(ConvertEncoding(AText, FEncoding, EncodingUTF8)); AText := CleanHTMLString(ConvertEncoding(AText, FEncoding, EncodingUTF8));
if AText <> '' then if AText <> '' then
begin begin
if FCellText = '' then if FCellText = '' then
FCellText := AText FCellText := AText
else
if beginsWithLineEnding then
FCellText := FCellText + ' ' + AText
else
if endsWithLineEnding then
FCelLText := FCelLText + AText + ' '
else else
FCellText := FCellText + AText; FCellText := FCellText + AText;
{
if FCellText[Length(FCellText)] = #10 then
FCellText := FCellText + AText
else
FCellText := FCellText + ' ' + AText;
}
end; end;
end; end;
end; end;

View File

@@ -376,28 +376,46 @@ end;
function CleanHTMLString(AText: String): String; function CleanHTMLString(AText: String): String;
var var
len: Integer;
ent: TsHTMLEntity; ent: TsHTMLEntity;
P: PChar; P: PChar;
ch: Char; ch: Char;
hasStartSpace, hasEndSpace: Boolean;
begin begin
Result := ''; Result := '';
// Remove leading and trailing spaces and line endings coming from formatted // Remove leading and trailing spaces and line endings coming from formatted
// source lines // source lines. Retain 1 single space, at the end even without spaces found.
while (Length(AText) > 0) and (AText[1] in [#9, #10, #13, ' ']) do // No idea if this is 100% correct - at least, looks good.
Delete(AText, 1,1); hasStartSpace := false;
while (Length(AText) > 0) and (AText[1] in [#9, #13, #10, ' ']) do
begin
if AText[1] = ' ' then hasStartSpace := true; // A leading space will be added later
Delete(AText, 1, 1);
end;
hasEndSpace := false;
while (Length(AText) > 0) and (AText[Length(AText)] in [#9, #10, #13, ' ']) do while (Length(AText) > 0) and (AText[Length(AText)] in [#9, #10, #13, ' ']) do
begin
hasEndSpace := true; // A trailing space will be added later
Delete(AText, Length(AText), 1); Delete(AText, Length(AText), 1);
end;
if AText = '' then if AText = '' then
exit; exit;
// Replace HTML entities by their counter part UTF8 characters // Replace HTML entities by their counter part UTF8 characters
len := Length(AText);
P := @AText[1]; P := @AText[1];
while (P^ <> #0) do begin while (P^ <> #0) do begin
ch := P^; ch := P^;
case ch of case ch of
' ': begin
// collapse multiple spaces to a single space (HTML spec)
// http://stackoverflow.com/questions/24615355/browser-white-space-rendering
Result := Result + ' ';
inc(P);
while (P^ = ' ') do inc(P);
dec(P);
end;
'&': begin '&': begin
inc(P); inc(P);
if (P <> nil) and IsHTMLEntity(P, ent) then if (P <> nil) and IsHTMLEntity(P, ent) then
@@ -414,6 +432,10 @@ begin
end; end;
inc(P); inc(P);
end; end;
// Add leading and trailing spaces from above.
if hasStartSpace then Result := ' ' + Result;
if hasEndSpace then Result := Result + ' ';
end; end;