File: //home/globfdxw/www/wp-content/plugins/wpforms-pdf/src/Notifications/Fields/Helpers.php
<?php
namespace WPFormsPDF\Notifications\Fields;
use DOMDocument;
use DOMXPath;
use DOMNode;
/**
* Shared helpers for fields.
*
* @since 1.2.0
*/
class Helpers {
/**
* Load DOM and XPath from HTML fragment.
*
* @since 1.2.0
*
* @param string $html HTML fragment.
*
* @return array
*/
public static function load_dom( string $html ): array {
$dom = new DOMDocument( '1.0', 'UTF-8' );
// Bail early if HTML is empty to avoid ValueError on PHP 8.0+.
if ( $html === '' ) {
return [ $dom, new DOMXPath( $dom ) ];
}
$prev_use_errors = libxml_use_internal_errors( true );
libxml_clear_errors();
// Encode non-ASCII characters as numeric entities to help libxml parse UTF-8 fragments without deprecated mbstring functions.
$html_input = function_exists( 'mb_encode_numericentity' )
? mb_encode_numericentity( $html, [ 0x80, 0x10FFFF, 0, 0xFFFFFF ], 'UTF-8' )
: htmlspecialchars( $html, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' );
$dom->loadHTML( $html_input, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD );
libxml_clear_errors();
libxml_use_internal_errors( $prev_use_errors );
$xpath = new DOMXPath( $dom );
return [ $dom, $xpath ];
}
/**
* Get value td.
*
* @since 1.2.0
*
* @param DOMXPath $xpath XPath instance.
* @param DOMNode $tr Table row node.
*
* @return DOMNode|null
*/
public static function get_value_cell( DOMXPath $xpath, DOMNode $tr ) {
$node = $xpath->query( './/td[contains(@class,"field-value")]', $tr )->item( 0 );
return $node ? $node : null;
}
/**
* Whether the row has a name cell.
*
* @since 1.2.0
*
* @param DOMXPath $xpath XPath instance.
* @param DOMNode $tr Table row node.
*
* @return bool
*/
public static function has_name_cell( DOMXPath $xpath, DOMNode $tr ): bool {
return (bool) $xpath->query( './/td[contains(@class,"field-name")]', $tr )->length;
}
/**
* Get inner HTML of a node by concatenating children HTML.
*
* @since 1.2.0
*
* @param DOMDocument $dom DOMDocument.
* @param DOMNode $node Node to extract from.
*
* @return string
*/
public static function get_inner_html( DOMDocument $dom, DOMNode $node ): string {
$html = '';
foreach ( $node->childNodes as $child ) { // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
$html .= $dom->saveHTML( $child );
}
return $html;
}
/**
* Get HTML of the name cell.
*
* @since 1.2.0
*
* @param DOMDocument $dom DOMDocument.
* @param DOMXPath $xpath DOMXPath.
* @param DOMNode $tr Row.
*
* @return string
*/
public static function get_name_cell_html( DOMDocument $dom, DOMXPath $xpath, DOMNode $tr ): string {
$field_name_td = $xpath->query( './/td[contains(@class,"field-name")]', $tr )->item( 0 );
return $field_name_td ? $dom->saveHTML( $field_name_td ) : '';
}
/**
* Build row HTML for a single part.
*
* @since 1.2.0
*
* @param string $base_row_class Base row class (e.g., 'field-textarea' or 'field-richtext').
* @param bool $has_field_name Whether the name cell exists.
* @param string $field_name_first_html HTML of the name cell for the first row.
* @param int $index Index of part.
* @param bool $is_last Whether this is the last row.
* @param string $part Part HTML.
*
* @return string
*/
public static function build_row_html( string $base_row_class, bool $has_field_name, string $field_name_first_html, int $index, bool $is_last, string $part ): string {
$row_class = $is_last ? $base_row_class . '-last' : $base_row_class;
if ( ! $has_field_name ) {
return <<<HTML
<tr class="$row_class"><td class="field-value">$part</td></tr>
HTML;
}
$field_name_html = $index === 0 ? $field_name_first_html : '<td class="field-name"></td>';
return <<<HTML
<tr class="$row_class">{$field_name_html}<td class="field-value" valign="middle">$part</td></tr>
HTML;
}
/**
* Replace a `<tr>` with a fragment built from parts.
*
* @since 1.2.0
*
* @param DOMDocument $dom DOMDocument.
* @param DOMNode $tr Row to replace.
* @param string $base_row_class Base row class used to build rows.
* @param bool $has_field_name Whether the name cell exists.
* @param string $field_name_first_html HTML of the name cell for the first row.
* @param array $parts Parts to insert.
*
* @return void
*/
public static function replace_row_with_parts( DOMDocument $dom, DOMNode $tr, string $base_row_class, bool $has_field_name, string $field_name_first_html, array $parts ): void {
$fragment = $dom->createDocumentFragment();
$last_idx = count( $parts ) - 1;
foreach ( $parts as $index => $part ) {
// Decode named entities to UTF-8 to keep fragment XML well-formed (XML doesn't know HTML5 entities).
$part_utf8 = html_entity_decode( (string) $part, ENT_QUOTES | ENT_HTML5, 'UTF-8' );
// Normalize BR tags to self-closing for XML parser.
$part_utf8 = preg_replace( '#<br\s*/?>#i', '<br />', $part_utf8 );
$row_html = self::build_row_html( $base_row_class, $has_field_name, $field_name_first_html, (int) $index, (int) $index === (int) $last_idx, $part_utf8 );
$prev_use_errors = libxml_use_internal_errors( true );
libxml_clear_errors();
$ok = $fragment->appendXML( $row_html );
$errors = libxml_get_errors();
libxml_clear_errors();
libxml_use_internal_errors( $prev_use_errors );
if ( ! $ok || ! empty( $errors ) ) {
// Fallback: strip tags to plain text and escape for XML to avoid losing content entirely.
$text = trim( wp_strip_all_tags( $part_utf8 ) );
$escaped = htmlspecialchars( $text, ENT_QUOTES | ENT_XML1, 'UTF-8' );
$row_html_plain = self::build_row_html( $base_row_class, $has_field_name, $field_name_first_html, (int) $index, (int) $index === (int) $last_idx, $escaped );
$prev_use_errors_plain = libxml_use_internal_errors( true );
libxml_clear_errors();
$fragment->appendXML( $row_html_plain );
libxml_clear_errors();
libxml_use_internal_errors( $prev_use_errors_plain );
}
}
$tr->parentNode->replaceChild( $fragment, $tr ); // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase
}
/**
* Save DOM to HTML string and decode HTML entities to UTF-8 characters.
*
* Ensures characters like “ ” — are output as real UTF-8 glyphs rather than named entities,
* avoiding mojibake in consumers that expect UTF-8 text.
*
* @since 1.2.0
*
* @param DOMDocument $dom DOMDocument.
*
* @return string
*/
public static function save_html( DOMDocument $dom ): string {
$html = $dom->saveHTML();
if ( $html === null ) {
return '';
}
// Replace known broken HTML entity triplets before decoding (e.g., ’ -> ’).
$html = strtr(
$html,
[
'’' => '’', // right single quotation mark.
'“' => '“', // left double quotation mark.
'”' => '”', // right double quotation mark.
'—' => '—', // em dash.
'–' => '–', // en dash.
]
);
// Convert remaining named entities to UTF-8 characters (HTML5 set covers “, ”, —, etc.).
$html = html_entity_decode( $html, ENT_QUOTES | ENT_HTML5, 'UTF-8' );
// Additional safety: handle common visible mojibake sequences after decoding.
$fix_map = [
'’' => '’', // right single quotation mark (U+2019).
'‘' => '‘', // left single quotation mark (U+2018).
'“' => '“', // left double quotation mark (U+201C).
// Right double quotation often appears as â€\x9D which may not be typable here; rely on entity fix above.
'–' => '–', // en dash (U+2013).
'—' => '—', // em dash (U+2014).
];
return strtr( $html, $fix_map );
}
}