You've already forked lazarus-ccr
fpspreadsheet: Improved rendering of white space by the html reader.
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@4262 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@ -921,38 +921,16 @@ begin
|
||||
end;
|
||||
|
||||
procedure TsHTMLReader.TextFoundHandler(AText: String);
|
||||
// Todo: find correct way to retain spaces
|
||||
// Example:
|
||||
// <td>123<b>abc</b> is rendered by browser as 123abc (with abc bold)
|
||||
// <td>123
|
||||
// <b>abc</b> is rendered as 123 abc
|
||||
// The current way is not good.
|
||||
var
|
||||
beginsWithLineEnding, endsWithLineEnding: Boolean;
|
||||
begin
|
||||
if FInCell then
|
||||
begin
|
||||
beginsWithLineEnding := (AText <> '') and (AText[1] in [#13, #10]);
|
||||
endsWithLineEnding := (AText <> '') and (AText[Length(AText)] in [#13,#10]);
|
||||
AText := CleanHTMLString(ConvertEncoding(AText, FEncoding, EncodingUTF8));
|
||||
if AText <> '' then
|
||||
begin
|
||||
if FCellText = '' then
|
||||
FCellText := AText
|
||||
else
|
||||
if beginsWithLineEnding then
|
||||
FCellText := FCellText + ' ' + AText
|
||||
else
|
||||
if endsWithLineEnding then
|
||||
FCelLText := FCelLText + AText + ' '
|
||||
else
|
||||
FCellText := FCellText + AText;
|
||||
{
|
||||
if FCellText[Length(FCellText)] = #10 then
|
||||
FCellText := FCellText + AText
|
||||
else
|
||||
FCellText := FCellText + ' ' + AText;
|
||||
}
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
@ -376,28 +376,46 @@ end;
|
||||
|
||||
function CleanHTMLString(AText: String): String;
|
||||
var
|
||||
len: Integer;
|
||||
ent: TsHTMLEntity;
|
||||
P: PChar;
|
||||
ch: Char;
|
||||
hasStartSpace, hasEndSpace: Boolean;
|
||||
begin
|
||||
Result := '';
|
||||
|
||||
// Remove leading and trailing spaces and line endings coming from formatted
|
||||
// source lines
|
||||
while (Length(AText) > 0) and (AText[1] in [#9, #10, #13, ' ']) do
|
||||
Delete(AText, 1,1);
|
||||
// source lines. Retain 1 single space, at the end even without spaces found.
|
||||
// No idea if this is 100% correct - at least, looks good.
|
||||
hasStartSpace := false;
|
||||
while (Length(AText) > 0) and (AText[1] in [#9, #13, #10, ' ']) do
|
||||
begin
|
||||
if AText[1] = ' ' then hasStartSpace := true; // A leading space will be added later
|
||||
Delete(AText, 1, 1);
|
||||
end;
|
||||
|
||||
hasEndSpace := false;
|
||||
while (Length(AText) > 0) and (AText[Length(AText)] in [#9, #10, #13, ' ']) do
|
||||
begin
|
||||
hasEndSpace := true; // A trailing space will be added later
|
||||
Delete(AText, Length(AText), 1);
|
||||
end;
|
||||
|
||||
if AText = '' then
|
||||
exit;
|
||||
|
||||
// Replace HTML entities by their counter part UTF8 characters
|
||||
len := Length(AText);
|
||||
P := @AText[1];
|
||||
while (P^ <> #0) do begin
|
||||
ch := P^;
|
||||
case ch of
|
||||
' ': begin
|
||||
// collapse multiple spaces to a single space (HTML spec)
|
||||
// http://stackoverflow.com/questions/24615355/browser-white-space-rendering
|
||||
Result := Result + ' ';
|
||||
inc(P);
|
||||
while (P^ = ' ') do inc(P);
|
||||
dec(P);
|
||||
end;
|
||||
'&': begin
|
||||
inc(P);
|
||||
if (P <> nil) and IsHTMLEntity(P, ent) then
|
||||
@ -414,6 +432,10 @@ begin
|
||||
end;
|
||||
inc(P);
|
||||
end;
|
||||
|
||||
// Add leading and trailing spaces from above.
|
||||
if hasStartSpace then Result := ' ' + Result;
|
||||
if hasEndSpace then Result := Result + ' ';
|
||||
end;
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user