fpspreadsheet: Improved rendering of white space by the html reader.

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@4262 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz
2015-08-10 20:19:51 +00:00
parent caacb195e4
commit 89b1fad36e
2 changed files with 27 additions and 27 deletions

View File

@ -921,38 +921,16 @@ begin
end;
procedure TsHTMLReader.TextFoundHandler(AText: String);
// Todo: find correct way to retain spaces
// Example:
// <td>123<b>abc</b> is rendered by browser as 123abc (with abc bold)
// <td>123
// <b>abc</b> is rendered as 123 abc
// The current way is not good.
var
beginsWithLineEnding, endsWithLineEnding: Boolean;
begin
if FInCell then
begin
beginsWithLineEnding := (AText <> '') and (AText[1] in [#13, #10]);
endsWithLineEnding := (AText <> '') and (AText[Length(AText)] in [#13,#10]);
AText := CleanHTMLString(ConvertEncoding(AText, FEncoding, EncodingUTF8));
if AText <> '' then
begin
if FCellText = '' then
FCellText := AText
else
if beginsWithLineEnding then
FCellText := FCellText + ' ' + AText
else
if endsWithLineEnding then
FCelLText := FCelLText + AText + ' '
else
FCellText := FCellText + AText;
{
if FCellText[Length(FCellText)] = #10 then
FCellText := FCellText + AText
else
FCellText := FCellText + ' ' + AText;
}
end;
end;
end;

View File

@ -376,28 +376,46 @@ end;
function CleanHTMLString(AText: String): String;
var
len: Integer;
ent: TsHTMLEntity;
P: PChar;
ch: Char;
hasStartSpace, hasEndSpace: Boolean;
begin
Result := '';
// Remove leading and trailing spaces and line endings coming from formatted
// source lines
while (Length(AText) > 0) and (AText[1] in [#9, #10, #13, ' ']) do
Delete(AText, 1,1);
// source lines. Retain 1 single space, at the end even without spaces found.
// No idea if this is 100% correct - at least, looks good.
hasStartSpace := false;
while (Length(AText) > 0) and (AText[1] in [#9, #13, #10, ' ']) do
begin
if AText[1] = ' ' then hasStartSpace := true; // A leading space will be added later
Delete(AText, 1, 1);
end;
hasEndSpace := false;
while (Length(AText) > 0) and (AText[Length(AText)] in [#9, #10, #13, ' ']) do
begin
hasEndSpace := true; // A trailing space will be added later
Delete(AText, Length(AText), 1);
end;
if AText = '' then
exit;
// Replace HTML entities by their counter part UTF8 characters
len := Length(AText);
P := @AText[1];
while (P^ <> #0) do begin
ch := P^;
case ch of
' ': begin
// collapse multiple spaces to a single space (HTML spec)
// http://stackoverflow.com/questions/24615355/browser-white-space-rendering
Result := Result + ' ';
inc(P);
while (P^ = ' ') do inc(P);
dec(P);
end;
'&': begin
inc(P);
if (P <> nil) and IsHTMLEntity(P, ent) then
@ -414,6 +432,10 @@ begin
end;
inc(P);
end;
// Add leading and trailing spaces from above.
if hasStartSpace then Result := ' ' + Result;
if hasEndSpace then Result := Result + ' ';
end;