You've already forked lazarus-ccr
fpspreadsheet: Improved rendering of white space by the html reader.
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@4262 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
@@ -921,38 +921,16 @@ begin
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TsHTMLReader.TextFoundHandler(AText: String);
|
procedure TsHTMLReader.TextFoundHandler(AText: String);
|
||||||
// Todo: find correct way to retain spaces
|
|
||||||
// Example:
|
|
||||||
// <td>123<b>abc</b> is rendered by browser as 123abc (with abc bold)
|
|
||||||
// <td>123
|
|
||||||
// <b>abc</b> is rendered as 123 abc
|
|
||||||
// The current way is not good.
|
|
||||||
var
|
|
||||||
beginsWithLineEnding, endsWithLineEnding: Boolean;
|
|
||||||
begin
|
begin
|
||||||
if FInCell then
|
if FInCell then
|
||||||
begin
|
begin
|
||||||
beginsWithLineEnding := (AText <> '') and (AText[1] in [#13, #10]);
|
|
||||||
endsWithLineEnding := (AText <> '') and (AText[Length(AText)] in [#13,#10]);
|
|
||||||
AText := CleanHTMLString(ConvertEncoding(AText, FEncoding, EncodingUTF8));
|
AText := CleanHTMLString(ConvertEncoding(AText, FEncoding, EncodingUTF8));
|
||||||
if AText <> '' then
|
if AText <> '' then
|
||||||
begin
|
begin
|
||||||
if FCellText = '' then
|
if FCellText = '' then
|
||||||
FCellText := AText
|
FCellText := AText
|
||||||
else
|
|
||||||
if beginsWithLineEnding then
|
|
||||||
FCellText := FCellText + ' ' + AText
|
|
||||||
else
|
|
||||||
if endsWithLineEnding then
|
|
||||||
FCelLText := FCelLText + AText + ' '
|
|
||||||
else
|
else
|
||||||
FCellText := FCellText + AText;
|
FCellText := FCellText + AText;
|
||||||
{
|
|
||||||
if FCellText[Length(FCellText)] = #10 then
|
|
||||||
FCellText := FCellText + AText
|
|
||||||
else
|
|
||||||
FCellText := FCellText + ' ' + AText;
|
|
||||||
}
|
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
@@ -376,28 +376,46 @@ end;
|
|||||||
|
|
||||||
function CleanHTMLString(AText: String): String;
|
function CleanHTMLString(AText: String): String;
|
||||||
var
|
var
|
||||||
len: Integer;
|
|
||||||
ent: TsHTMLEntity;
|
ent: TsHTMLEntity;
|
||||||
P: PChar;
|
P: PChar;
|
||||||
ch: Char;
|
ch: Char;
|
||||||
|
hasStartSpace, hasEndSpace: Boolean;
|
||||||
begin
|
begin
|
||||||
Result := '';
|
Result := '';
|
||||||
|
|
||||||
// Remove leading and trailing spaces and line endings coming from formatted
|
// Remove leading and trailing spaces and line endings coming from formatted
|
||||||
// source lines
|
// source lines. Retain 1 single space, at the end even without spaces found.
|
||||||
while (Length(AText) > 0) and (AText[1] in [#9, #10, #13, ' ']) do
|
// No idea if this is 100% correct - at least, looks good.
|
||||||
Delete(AText, 1,1);
|
hasStartSpace := false;
|
||||||
|
while (Length(AText) > 0) and (AText[1] in [#9, #13, #10, ' ']) do
|
||||||
|
begin
|
||||||
|
if AText[1] = ' ' then hasStartSpace := true; // A leading space will be added later
|
||||||
|
Delete(AText, 1, 1);
|
||||||
|
end;
|
||||||
|
|
||||||
|
hasEndSpace := false;
|
||||||
while (Length(AText) > 0) and (AText[Length(AText)] in [#9, #10, #13, ' ']) do
|
while (Length(AText) > 0) and (AText[Length(AText)] in [#9, #10, #13, ' ']) do
|
||||||
|
begin
|
||||||
|
hasEndSpace := true; // A trailing space will be added later
|
||||||
Delete(AText, Length(AText), 1);
|
Delete(AText, Length(AText), 1);
|
||||||
|
end;
|
||||||
|
|
||||||
if AText = '' then
|
if AText = '' then
|
||||||
exit;
|
exit;
|
||||||
|
|
||||||
// Replace HTML entities by their counter part UTF8 characters
|
// Replace HTML entities by their counter part UTF8 characters
|
||||||
len := Length(AText);
|
|
||||||
P := @AText[1];
|
P := @AText[1];
|
||||||
while (P^ <> #0) do begin
|
while (P^ <> #0) do begin
|
||||||
ch := P^;
|
ch := P^;
|
||||||
case ch of
|
case ch of
|
||||||
|
' ': begin
|
||||||
|
// collapse multiple spaces to a single space (HTML spec)
|
||||||
|
// http://stackoverflow.com/questions/24615355/browser-white-space-rendering
|
||||||
|
Result := Result + ' ';
|
||||||
|
inc(P);
|
||||||
|
while (P^ = ' ') do inc(P);
|
||||||
|
dec(P);
|
||||||
|
end;
|
||||||
'&': begin
|
'&': begin
|
||||||
inc(P);
|
inc(P);
|
||||||
if (P <> nil) and IsHTMLEntity(P, ent) then
|
if (P <> nil) and IsHTMLEntity(P, ent) then
|
||||||
@@ -414,6 +432,10 @@ begin
|
|||||||
end;
|
end;
|
||||||
inc(P);
|
inc(P);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
// Add leading and trailing spaces from above.
|
||||||
|
if hasStartSpace then Result := ' ' + Result;
|
||||||
|
if hasEndSpace then Result := Result + ' ';
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user