<?php
/*		Freewheeling Easy Mapping Application
 *		A collection of routines for display of trail maps and amenities
 *		copyright Roy R Weil 2019 - https://royweil.com
 */
class freewheeling_Extract_Data_From_HtmlFile
{
    static public function Extract($trailId)
    {
        /*extract data from the html file to the html table
        // the html table is used to build the website
        // paragraph is every thing down to the first <div class="Heading-5"
        // after that each heading-5 is a group of data to be extracted and  saved based on the heading contents
        // one special is the data blocks. The internal data is extracted and saved in various variables
        //     the block itself is included in the paragraph.
        //     at present 7/2024 only the first data block information is extracted.
        // the last heading-5 is the version information
        //
        // error count is the number of << in the contents
*/
        global $eol, $errorBeg, $errorEnd;
        global $wpdbExtra, $wpdb;
        global $freewheeling_prebuilt_dire_trails_html;
        $msg = "";
        $debugDatabase = false;
        $debugFoundContent = false;
        $msg .= freeWheeling_edit_setGlobals::setGlobals("Extract");
        ini_set("display_errors", true);
        $msg .= "---------------------------------  I#2140 Extracting the Website data "; // more to come;
        $debugBufferProgress = false;
        try {
            $filename = "$trailId.html";
            $fileNameFull = "$freewheeling_prebuilt_dire_trails_html/$filename";
            $msg .= "from file $fileNameFull" . $eol;
            if (!file_exists($fileNameFull)) {
                $msg .= "$errorBeg E#2141 File $fileNameFull does not exist" . $errorEnd;
                return $msg;
            }
            $msg .= "#2142 File $fileNameFull exists" . $eol;
            $trailCnt = $wpdbExtra->get_var("select count(*) from $wpdbExtra->trails where trId = '$trailId'");
            if (1 != $trailCnt) {
                throw new Exception("$errorBeg E#2143 Trail $trailId does not exist in the trails table $errorEnd");
            }
            $htmCnt = $wpdbExtra->get_var("select count(*) from $wpdbExtra->html where htmTrailId = '$trailId'");
            if (0 == $htmCnt) {
                $msg .= "I#2144 First time create an entry" . $eol;
                $htmCnt = $wpdbExtra->insert($wpdbExtra->html, array("htmTrailId" => $trailId));
            }
            if (1 != $htmCnt) {
                throw new Exception("$errorBeg E#2145 Trail $trailId does not exist in the html table $errorEnd");
            }
            if ($debugDatabase) $msg .= "Trail ID $trailId - $wpdbExtra->trails $trailCnt, html $htmCnt" . $eol;
            // assert that the row exists in the $wpdbExtra->html table, and the $wpdbExtra->trails table
            $bufferIn = file_get_contents($fileNameFull);
            $buffer = $bufferIn;
            $msg .= "File $fileNameFull has length " . strlen($buffer) . $eol;
            $msg .= self::remove_A_id_equal($buffer);
            $buffer = self::removeBlankHref($buffer);
            //src="oilcity-web-resources/image/oilcity-134536.png
            $searchString = 'src="' . $trailId . '-web-resources/image/';
            $msg .= "I#2146 searchString is $searchString" . $eol;
            $replaceString = 'src="';
            $buffer = str_replace($searchString, $replaceString, $buffer);
            $msg .= self::UpdateErrorCount($bufferIn, $trailId);  // update the error count before any gets deleted
            if ($debugBufferProgress) $msg .= self::DisplayFound("after remove a id buffer ", $buffer);
            // this code deletes he <head>.../head><body... which is not used
            $ii = strpos($buffer, "<body");
            if (false === $ii) {
                $msg .= "$errorBeg E#2147 buffer does not have a body tag" . $errorEnd;
                return $msg;
            }
            $buffer = substr($buffer, $ii - 5); // remove every thing before the body tag
            // this code could pick up the location at the top of the file.
            // should move buffer to start of the first paragraph
            $msg .= self::ClassParagraph($buffer, "Heading-3", $description, "</p"); // points to class="Heading-3"
            if ($debugFoundContent) $msg .= self::DisplayFound("Description-Heading-3", $description);
            if ($debugBufferProgress) $msg .= self::DisplayFound("the Title line ", $buffer);
            $msg .= self::ClassParagraph($buffer, "Heading-4", $location, "</p", false); // leave class=text
            if ($debugFoundContent) $msg .= self::DisplayFound("location-Heading-4", $location);
            if ($debugBufferProgress) $msg .= self::DisplayFound("the location line ", $buffer);
            $firstSection = "";
            $msg .= self::ClassParagraph($buffer, "Text", $firstSection, '<p class="DB-Trail"');
            if ($debugFoundContent) $msg .= self::DisplayFound("the first paragraph", $firstSection);
            if ($debugBufferProgress) $msg .= self::DisplayFound("buffer before search for dataBlock", $buffer);
            $htmTableUpdate["htmFirstSection"] = $firstSection;
            // if no description in the trails database, use the one from the HTML
            $sqlDescription = "select description from $wpdbExtra->trails where trId = '$trailId'";
            $msg .= "I#2148 sqlDescription is $sqlDescription" . $eol;
            $description = $wpdbExtra->get_var($sqlDescription);
            if (1 != $wpdbExtra->num_rows) {
                $msgErr = "$errorBeg E#2149 failed to get the description from the trails table $errorEnd $eol";
                $msgErr .= $wpdbExtra->print_error();
                throw new Exception($msgErr);
            }
            if (empty($description) || !empty($firstSection)) {
                $update = array("Description" => $firstSection);
                $where = array("trId" => $trailId);
                $result = $wpdbExtra->update($wpdbExtra->trails, $update, $where);
                $numUpdated = $wpdbExtra->num_rows;
                if (false === $result) {
                    $msg .= "$errorBeg E#2150 failed to  update the trails table with a description numUpdated = $numUpdated  $errorEnd" .
                        "current description = '" . htmlspecialchars($firstSection) . "'" .
                        rrwUtil::print_r($update, true, "failed to  update the trails table with a description");
                    $msg .= $wpdbExtra->print_error();
                    return $msg;
                }
            }
            // stuff before data block has been handled, now get the data block
            $msg .= self::ClassParagraph($buffer, "DB-Trail", $dataBlock, 'class="Text"');
            if ($debugFoundContent);
            $msg .= self::DisplayFound("dataBlock", $dataBlock);
            $msg .= self::DistributeTheDataBlock($dataBlock, $trailId, $debugDatabase);
            $msg .= self::DisplayFound("buffer after data block", substr($buffer, 0, 500));
            // extract the stuff after the data block as more description
            $msg .= self::ClassParagraph($buffer, "Text", $htmDescriptionAfterDataBlock, "Heading-5");
            $msg .= self::DisplayFound("trailDescription", $htmDescriptionAfterDataBlock);
            $htmTableUpdate["htmDescriptionAfterDataBlock"] = $htmDescriptionAfterDataBlock;
            $msg .= "I#2151 description length " . strlen($description) . " is $description" . $eol;
            // now gather up all the heading-5 sections
            $cntWhile = 0;
            try {
                while (1) {
                    $cntWhile++;
                    if ($cntWhile > 10) {
                        $msg .= "$errorBeg E#2152 Too many heading-5 sections loops" . $errorEnd;
                        return $msg;
                    }
                    $msg .= self::FindHeadingGroup5($buffer, $head5, $text);
                    $searchString = 'src="' . $trailId . '-web-resources/image/';
                    //$msg .= "I#2 searchString is $searchString" . $eol;
                    $text = str_replace($searchString, 'src="', $text);
                    $text = str_replace('src="' . $trailId . '-web-resources/image/', 'src="', $text);
                    if ($head5 === false) {
                        break; // no more groups
                    }
                    $head5 = trim($head5);
                    switch ($head5) {
                        case "Development plans":
                        case "Development Plans":
                            $htmTableUpdate["htmDevelopment"] = $text;
                            break;
                        case "Extensions of the ride":
                            $htmTableUpdate["htmExtension"] = $text;
                            break;
                        case "Access points":
                            $htmTableUpdate["htmAccessPoints"] = $text;
                            break;
                        case "Amenities":
                            $htmTableUpdate["htmAmenitiesList"] = $text;
                            break;
                        case "Local history, attractions":
                            $htmTableUpdate["htmLocalHistory"] = $text;
                            break;
                        case "Trail organization":
                        case "Trail organizations":
                            $msg .= self::extractOrganizationInformation($text, $trailId);
                            $htmTableUpdate["htmOrganizationAddress"] = $text;
                            break;
                        case "Maps, guides, other references":
                            $htmTableUpdate["htmResources"] = $text;
                            break;
                        case "Version":
                            $msg .= self::extractVersionInformation($text, $trailId);
                            $htmTableUpdate["VersionText"] = $text;
                            break;
                        default:
                            $msgErr = "$errorBeg E#2154 Unknown type '$head5'" . $errorEnd;
                            $msgErr .= self::DisplayFound($head5, $text) . " $eol
                                expected types: Development plans, Extensions of the ride,
                                 Access points, Amenities, Local history, attractions,
                                Trail organization, Maps, guides, other references, Version";
                            throw new Exception($msgErr);
                    } //
                    if ($debugFoundContent) $msg .= self::DisplayFound($head5, $text);
                    if ("Version" == $head5)
                        break;  // assume that the version is the last heading 5
                } // end while
            } catch (Exception $e) {
                $msgErr = $e->getMessage();
                return "$msg $errorBeg E#2155 Exception in heading-5 loop $msgErr $errorEnd";
            }
            $msg .= "I#2156 updating the html data $eol";
            //    $msg .= FreewheelFormat::EditHTMLink($trailId, "$trailId html information");
            $whereClause = array("htmTrailId" => $trailId);
            $cnt = $wpdb->update($wpdbExtra->html, $htmTableUpdate, $whereClause);
            if (false === $cnt) {
                $msgErr = "$errorBeg E#2157 failed to  update $wpdbExtra->html problem item too big . $errorEnd";
                $msgErr .= $wpdbExtra->print_error();
                foreach ($htmTableUpdate as $key => $value) {
                    $msgErr .= "item '$key' length " . strlen($value) . $eol;
                }
                $msgErr .= rrwUtil::print_r($htmTableUpdate, true, "update the $wpdbExtra->html");
                throw new Exception($msgErr);
            }
        } catch (Exception $e) {
            $msgErr = $e->getMessage();
            throw new Exception("$msg E#2158 at bottom of extract with $msgErr");
        }
        return $msg;
    } // end function Extract
    private static function FindHeadingGroup5(&$buffer, &$headingName, &$text)
    {
        //find the text between the next set of heading-5
        // return the found text in $text.
        // return the heading name in $headingName
        // remove the text from the buffer
        // return false if no more groups
        global $eol, $errorBeg, $errorEnd;
        $debugGroup5 = false;
        $debugFindHeadingGroup5 = false;
        $iiStart = self::FindStartParagraphWithClass($buffer, "Heading-5", $msg);
        if ($debugFindHeadingGroup5) $msg .= "Finding first Heading-5 at '" . $iiStart . "'$eol";
        if ($iiStart === false) {
            if ($debugGroup5)
                $msg .= "$errorBeg E#2159 buffer does not have a paragraph with class Heading-5. at end?? $errorEnd buffer start is:
                " . str_repeat("-", 80) . " Buffer start $eol" . htmlspecialchars($buffer, ENT_QUOTES) . str_repeat("-", 80) . " Buffer end $eol";
            $headingName = "";
            return $msg;
        }
        // found a heading-5
        $iiCarrot = strpos($buffer, ">", $iiStart); //find the rest of class
        $iiCloseCarrot = strpos($buffer, "<", $iiCarrot + 1); //find the end of the heading 5
        $headingName = substr($buffer, $iiCarrot + 1, $iiCloseCarrot - $iiCarrot - 1);
        $headingName = trim($headingName);
        if ($debugFindHeadingGroup5) $msg .= self::DisplayFound("first part, aka headingName", $headingName);
        // there is  </p> after the heading 5  and before the class=ext of the headingName
        $buffer = substr($buffer, $iiCloseCarrot); // remove the heading 5
        $buffer = self::removeBeginP($buffer);
        if ($debugFindHeadingGroup5)  $msg .= self::DisplayFound("remain buffer ", $buffer);
        // we now have the headingName find the next heading-5
        $iiEnd = self::FindStartParagraphWithClass($buffer, "Heading-5", $msg); // find the next one
        if ($debugFindHeadingGroup5) $msg .= "Finding ending Heading-5 at '" . $iiEnd . "'$eol";
        if ($iiEnd === false) {
            // no more heading-5, use the end of the buffer or the next </div>
            $iiEndBuffer = strlen($buffer);
            $iiEnd = strpos($buffer, "</div>"); // last one ends in </div>
            if ($iiEnd === false) {
                // $msg .= self::DisplayFound("E#2160 buffer does not have a paragraph with class Heading-5", $buffer);
                $iiEnd = strlen($buffer); // no more text, use the end of the buffer
            }
        }
        if ($debugFindHeadingGroup5) $msg .= "extracting text from 0 to  '" . $iiEnd . "'$eol";
        if ($debugFindHeadingGroup5) $msg .= self::DisplayFound("buffer to end ", substr($buffer, 0, $iiEnd));
        $text = substr($buffer, 0, $iiEnd);
        if ($debugFindHeadingGroup5) $msg .= "before " . htmlspecialchars($text, ENT_QUOTES) . $eol;
        $text = self::removeBeginP($text);
        if ($debugFindHeadingGroup5) $msg .= "after  " . htmlspecialchars($text, ENT_QUOTES) . $eol;
        $buffer = substr($buffer, $iiEnd);
        if ($debugFindHeadingGroup5) {
            $msg .= self::DisplayFound("next buffer to analyze", substr($buffer, 0, 200) . "...........");
        }
        $msg .= self::DisplayFound("extracted Information <strong>$headingName</strong> with ", $text);
        return $msg;
    }
    private static function removeBeginP($buffer)
    {
        if (0 == strncmp($buffer, "</p>", 4))
            $buffer = substr($buffer, 4); // skip the </p>
        if (0 == strncmp($buffer, "class=", 6)) {
            $buffer = "<p " . $buffer;    // put back the <p class=
        }
        return $buffer;
    }
    private static function DisplayFound($msg2display, $text2display)
    {
        global $eol;
        $msg = "";
        if (empty($text2display)) {
            $msg .= "---------------------------------  $msg2display is empty  ----------------------------------------------$eol";
            return $msg;
        }
        $msg2display .= " found content length " . strlen($text2display);
        $text = substr($text2display, 0, 400);
        $text = $text2display;
        $msg .= str_repeat("V", 80) . " $msg2display $eol $text" . $eol . str_repeat("-", 30) .
            $eol . htmlspecialchars($text, ENT_QUOTES) . $eol . str_repeat("^", 80) . " $msg2display $eol $eol";
        return $msg;
    }
    /*
    ���
     * Finds the start of a paragraph with the given class in the buffer.
     *
     * @param string $buffer The HTML buffer to search.
     * @param string $class The class name to search for in the paragraph.
     * @param string $msgOutput Message output for debugging (output parameter).
     * @return int|false The index of the "class=" not the beginning of the tag, or false if not found.
     */
    private static function FindStartParagraphWithClass($buffer, $class, &$msgOutput)
    {
        // find the paragraph with the given class,  and return its location in the buffer
        // return false if not found
        global $eol, $errorBeg, $errorEnd;
        $iiClass = strpos($buffer, "class=\"$class");
        // $msgOutput .= "FindStartParagraphWithClass looking for class '$class' found at index '" . $iiClass . "' , length of buffer = $iiBufferEnd $eol";
        if ($iiClass === false) {
            // $msgOutput .= "$errorBeg E#2161 buffer does not have a paragraph with class $class" . $errorEnd;
            return false;
        }
        return $iiClass;
    }
    /**
     * Finds the paragraph with the given class, maybe removes it and all before from the buffer, and returns a message about the operation.
     *
     * @param string $buffer The HTML buffer to search and modify.
     * @param string $class The class name to search for in the paragraph.
     * @param string $paragraphText The extracted paragraph text (output parameter).
     * @param string $endString The string marking the end of the paragraph.
     * @param bool $removeIt Whether to remove the found paragraph from the buffer (default: true).
     * @return string Message describing the result of the operation.
     */
    private static function ClassParagraph(&$buffer, $class, &$paragraphText, $endString, bool $removeIt = true)
    {
        global $eol, $errorBeg, $errorEnd;
        $msg = "";
        $debugClassParagraph = true;
        if ($debugClassParagraph)
            $msg .= "I#2162 ClassParagraph('$class', '" . htmlspecialchars($endString) . "', $removeIt) $eol";
        if (empty($class)) {
            $iiClassEqual = 0;
        } else {
            $iiClassEqual = self::FindStartParagraphWithClass($buffer, $class, $msg);
            if ($iiClassEqual === false) {
                $msg .= "$errorBeg E#2163 buffer does not have a paragraph with class $class" . $errorEnd;
                if ($buffer !== false && strlen($buffer) > 0) {
                    $msg .= htmlspecialchars(substr($buffer, 0, 200)) . $eol;
                }
                return $msg;
            }
        } // iiStart is the start of what we want, now lets find the end
        $iiEnd = strpos($buffer, $endString, $iiClassEqual); //find the end of the paragraph
        if ($iiEnd === false) {
            $contextSnippet = htmlspecialchars(substr($buffer, 0, 300));
            $msg .= "$errorBeg E#2164 buffer does not have an ending '$endString' after index $iiClassEqual with class '$class'.$errorEnd $eol";
            $msg .= "Buffer context: $contextSnippet $eol";
            return $msg;
        }
        // iiEnd is the end of what we want
        $iiTextStart = strpos($buffer, ">", $iiClassEqual); //find the start of the text, i.e. end of class attributes
        if ($iiTextStart !== false && $iiEnd !== false && $iiEnd > $iiTextStart) {
            $paragraphText = substr($buffer, $iiTextStart + 1, $iiEnd - $iiTextStart - 1);
        } else {
            $contextSnippet = htmlspecialchars(substr($buffer, max(0, $iiTextStart - 50), 200));
            $msg .= "$errorBeg E#2165 searched class attribute does not have closing >.$errorEnd $eol";
            $msg .= "Buffer context: $contextSnippet $eol";
            return $msg;
        }
        $paragraphText = trim($paragraphText);
        if ($debugClassParagraph) $msg .= self::DisplayFound(" the found text", $paragraphText);
        if ($removeIt) {
            if ($debugClassParagraph) $msg .= self::DisplayFound("before remove", substr($buffer, 0, 400));
            $buffer = substr($buffer, $iiEnd);
            if ($debugClassParagraph) $msg .= self::DisplayFound("after remove", text2display: substr($buffer, 0, 400));
        }
        $msg .= "ClassParagraph found class of '$class' with length " . strlen($paragraphText) . " characters $eol";
        return $msg;
    } // end function ClassParagraph
    private static function DivParagraph($buffer, $class, &$description)
    {
        //find the div with the given class, return it
        global $eol, $errorBeg, $errorEnd;
        $msg = "";
        $debugFindHead3_4Paragraph = false;
        $iiStart = self::FindStartParagraphWithClass($buffer, $class, $msg);
        if ($iiStart === false) {
            $msg .= "$errorBeg E#2166 buffer does not have a paragraph with class $class" . $errorEnd;
            return $msg;
        }
        $iiEnd = strpos($buffer, "</div>", $iiStart); //find the end of the data block
        if ($iiEnd === false) {
            $msg .= "$errorBeg buffer does not have an ending &lt;/div&gt; with class $class" . $errorEnd;
            return $msg;
        }
        $iiCarrot = strpos($buffer, ">", $iiStart) + 1; //find the start of the text
        $description = substr($buffer, $iiCarrot, $iiEnd - $iiCarrot);
        return $msg;
    } // end function DivParagraph
    private static function toHeading5(&$buffer, &$description)
    {
        //find the div with the given class, return it
        global $eol, $errorBeg, $errorEnd;
        $msg = "";
        $debugFindHead3_4Paragraph = false;
        $class = "Heading5";
        $iiStart = self::FindStartParagraphWithClass($buffer, $class, $msg);
        if ($iiStart === false) {
            $msg .= "$errorBeg E#2167 buffer does not have a paragraph with class $class" . $errorEnd;
            return $msg;
        }
        $iiEnd = strpos($buffer, "</div>", $iiStart); //find the end of the data block
        if ($iiEnd === false) {
            $msg .= "$errorBeg buffer does not have an ending &lt;/div&gt; with class $class" . $errorEnd;
            return $msg;
        }
        $iiCarrot = strpos($buffer, ">", $iiStart) + 1; //find the start of the text
        $description = substr($buffer, $iiCarrot, $iiEnd - $iiCarrot);
        return $msg;
    } // end function DivParagraph
    /*
    private static function FindHead3_4Paragraph(&$buffer, $class, &$description)
    {
        //find the paragraph with the given class, remove it from the buffer and return it
        global $eol, $errorBeg, $errorEnd;
        $msg = "";
        $debugFindHead3_4Paragraph = false;
        $iiStart = self::FindStartParagraphWithClass($buffer, $class, $msg);
        if ($iiStart === false) {
            $msg .= "$errorBeg E#2176 buffer does not have a paragraph with class $class" . $errorEnd;
            return $msg;
        }
        $iiEnd = strpos($buffer, "</p>", $iiStart); //find the end of the paragraph
        if ($iiEnd === false) {
            $msg .= "$errorBeg buffer does not have an ending &lt;/p&gt; with class $class" . $errorEnd;
            return $msg;
        }
        $iiCarrot = strpos($buffer, ">", $iiStart) + 1; //find the start of the text
        $description = substr($buffer, $iiCarrot, $iiEnd - $iiCarrot);
        if ($debugFindHead3_4Paragraph)  $msg .= self::DisplayFound("before remove", $buffer);
        if ($iiStart > 0)
            $buffer = substr($buffer, 0, $iiStart - 1) . substr($buffer, $iiEnd + 4);
        else
            $buffer = substr($buffer, $iiEnd + 4);
        if ($debugFindHead3_4Paragraph) $msg .= self::DisplayFound("after remove", $buffer);
        return $msg;
    } // end function FindHead3_4Paragraph
*/
    private static function removeBlankHref($buffer)
    {
        $bufferOut = preg_replace('/<a href="">(.*?)<\/a>/', '$1', $buffer);
        return $bufferOut;
    }
    private static function remove_A_id_equal(&$buffer)
    {
        global $eol, $errorBeg, $errorEnd;
        $msg = "";
        $debugAidEqual = false;
        $id = true;
        $idCnt = 0;
        while ($id) {
            $idCnt++;
            if ($idCnt > 50) {
                $msg .= "$errorBeg E#2169 File has too many <A id= tags" . $errorEnd;
                return $msg;
            }
            $id = preg_match("/(.+?)(<a id=\")(.+?)(a>)/", $buffer, $matches, PREG_OFFSET_CAPTURE);
            if ($id === false)
                break;
            if (is_null($matches))
                break;
            if (0 == count($matches)) {
                continue; // no matches, assume we are done
            }
            if ($debugAidEqual) {
                foreach ($matches as $key => $value) {
                    $msg .= "$key => index " . $value[1] . " -  " . htmlspecialchars($value[0], ENT_QUOTES) . $eol;
                }
            }
            $buffer1 = substr($buffer, 0, $matches[2][1]);
            $buffer2 = substr($buffer, $matches[4][1] + 2, 500);
            $buffer = $buffer1 . substr($buffer, $matches[4][1] + 2);
            if ($debugAidEqual) {
                $msg .= self::DisplayFound("front half", $buffer1);
                $msg .= self::DisplayFound("back half", $buffer2);
                $msg .= self::DisplayFound("start of buffer", substr($buffer, 0, 500));
            }
        }
        $msg .= "Removed $idCnt &lt;a id= tags" . $eol;
        return $msg;
    } //
    private static function DistributeTheDataBlock($block, $trailId, $debugDatabase = false)
    {
        global $eol, $errorBeg, $errorEnd;
        global $wpdbExtra;
        $msg = "";
        if ($debugDatabase) $msg .= "into distribute the Data block" . $eol;
        $location = self::GetLine($block, ">Location");
        $trailHeads = self::GetLine($block, ">Trailheads");
        $surfaceDescription = self::GetLine($block, ">Length, Surface");
        $iiPaved = strpos($surfaceDescription, "paved");
        if (false === $iiPaved) {
            $paved = 0;
        } else { // assert there is a preceding comma
            $temp = substr($surfaceDescription, 0, $iiPaved - 1);
            $iiStart = strlen($surfaceDescription) - $iiPaved; // remove paved and all after
            $ii = preg_match("/\d+\.?\d*/", $temp, $matches, PREG_OFFSET_CAPTURE); //  gets all number in the string
            if (false === $ii) {
                $msg .= "$errorBeg E#2170 failed to find the number of paved miles in '$temp' $errorEnd";
                return $msg;
            }
            $paved = $matches[count($matches) - 1];
            if (is_array($paved)) {
                $paved = array_sum($paved);
            }
        }
        $characterDescription = self::GetLine($block, ">Character");
        $Usage = self::GetLine($block, ">Usage restrictions");
        $Amenities = self::GetLine($block, ">Amenities");
        $DriveTime = self::GetLine($block, ">Driving time from Pittsburgh");
        $updateHtm = array(
            "Location" => $location,
            "TrailHeadCollection" => $trailHeads,
            "Usage" => $Usage,
            "AmenitiesCollection" => $Amenities,
            "DriveTime" => $DriveTime,
            "surfaceDescription" => $surfaceDescription,
            "pavedMiles" => $paved,
            "characterDescription" => $characterDescription,
        );
        $msg .= rrwUtil::print_r($updateHtm, true, "update Htm");
        $htmCnt = $wpdbExtra->update($wpdbExtra->html, $updateHtm, array("htmTrailId" => $trailId));
        if (false === $htmCnt) {
            $msg .= "$errorBeg E#2171 failed to  update the html table $errorEnd";
            $msg .= $wpdbExtra->print_error();
            $msg .= rrwUtil::print_r($updateHtm, true, "update $wpdbExtra->html");
            //   $msg .= $wpdbExtra->last_query;
            return $msg;
        }
        if ($debugDatabase) $msg .= "updated $htmCnt rows in $wpdbExtra->html" . $eol;
        return $msg;
    }
    private static function GetLine($block, $item)
    {
        global $eol, $errorBeg, $errorEnd;
        $ii = strpos($block, "$item");
        if ($ii === false)
            throw new Exception("$errorEnd E#2172 Block data did not contain
                                " . htmlspecialchars($item, ENT_QUOTES) . $eol . htmlspecialchars($block, ENT_QUOTES) . $errorEnd);
        $ii = $ii + strlen($item);
        $ii = strpos($block, "&#9;", $ii); // find the tab character
        if ($ii === false)
            throw new Exception("$errorBeg E#2173 Block data did not contain termination of
                            " . htmlspecialchars($item, ENT_QUOTES) . $eol . htmlspecialchars(";") . $errorEnd);
        $jj = strpos($block, "<", $ii);
        if ($jj === false)
            throw new Exception("$errorBeg E#2174 Block data did not contain termination of
                                " . htmlspecialchars($item, ENT_QUOTES) . $eol . htmlspecialchars("<") . $errorEnd);
        $line = substr($block, $ii + 4, $jj - $ii - 4);   // between the tab and the </p>
        $line = trim($line);
        return $line;
    } // end function GetLine
    private static function extractOrganizationInformation($OrgText, $trailId)
    {
        global $eol, $errorBeg, $errorEnd;
        global $wpdbExtra;
        $msg = "";
        $debugOrg = rrwParam::isDebugMode(("debugOrg"));
        $NotFoundDomain = true;
        foreach (array(".org", ".gov", ".com") as $domain) {
            $iiDomain = strpos($OrgText, $domain);
            if (false !== $iiDomain) {
                $NotFoundDomain  = false;
                break;
            }
        }
        if ($NotFoundDomain) {
            $msg .= "$errorBeg E#2177 Organization text does not have a .org $errorEnd";
            return $msg;
        }
        if ($debugOrg) $msg .= "found domain at index $iiDomain text  is '" . htmlspecialchars(substr($OrgText, $iiDomain, 7)) . "' $eol";
        $iiEnd = strpos($OrgText, "<", $iiDomain);
        if ($debugOrg) $msg .= "found IIend  at index $iiEnd text  is '" . htmlspecialchars(substr($OrgText, $iiEnd, 7)) . "' $eol";
        if (false === $iiEnd) {
            $msg .= "$errorBeg E#2175 Organization text does not have an ending < after the domain $errorEnd";
            return $msg;
        }
        if ($debugOrg) $msg .= "found .org at index $iiEnd $eol";
        $iiStart = strrpos($OrgText, ">", $iiEnd - strlen($OrgText));
        if (false === $iiStart) {
            $msg .= "$errorBeg E#2168 Organization text does not have a < before the .org $errorEnd";
            return $msg;
        }
        if ($debugOrg) $msg .= "found greater than  at index $iiStart $eol";
        if ($debugOrg) $msg .= "bookURL = substr(xxx, $iiStart + 1, $iiEnd - $iiStart); $eol";
        $bookURL = substr($OrgText, $iiStart + 1, $iiEnd - $iiStart - 1);
        if ("http" != substr($bookURL, 0, 4)) {
            $bookURL = "https://" . $bookURL;
        }
        $updateResult = $wpdbExtra->update(
            $wpdbExtra->html,
            array("bookURL" => $bookURL),
            array("htmTrailId" => $trailId),
        );
        return "$msg -- update the bookURL to '" . htmlspecialchars($bookURL) . "' $eol";
    } // end function extractOrganizationInformation
    private static function extractVersionInformation($versionText, $trailId)
    {
        global $eol, $errorBeg, $errorEnd;
        global $wpdbExtra;
        $msg = "";
        $debugLastEdit = false;
        $debugVersion = false;
        $debugOldest = false;
        try {
            $updateData = array();
            if (empty($versionText)) {
                $msg .= "$errorBeg Version text 'Text version',
                            'Oldest segment check'is missing or duplicate $errorEnd";
                return $msg;
            }
            if ($debugVersion) $msg .= self::DisplayFound("Extracting from versionText", $versionText);
            $textEnd = "Oldest segment check";  // hunt for the oldest segment check
            $iiStart = strpos($versionText, $textEnd); //oldest segment check
            if (false === $iiStart) {
                $msg .= "$errorBeg oldest date is missing or duplicate $errorEnd";
            } else {
                $iiStart = $iiStart + strlen($textEnd); // after 'oldest segment check
                $oldest = substr($versionText, $iiStart);
                $iiPeriod = strpos($oldest, ".");
                if (false === $iiPeriod) {
                    $msg .= "$errorBeg E#2178 oldest segment check does not have a period $errorEnd";
                    $oldest = "01/01/1900";
                } else {
                    $oldest = substr($oldest, 0, $iiPeriod);
                }
                if (strlen($oldest) > 10) {
                    $msg .= "$errorBeg E#2179 oldest date has too many characters $errorEnd";
                    $oldest = "01/01/1900";
                }
                if ($debugOldest) $msg .= self::DisplayFound("oldest extracted from version all", $oldest);
                $oldest = str_replace(".", "", $oldest); // remove the period// remove trailing period
                if ($debugOldest) $msg .= self::DisplayFound("oldest extracted from version after remove period", $oldest);
                $oldest = freewheelingeasy_kml_book_date::DateFormat($oldest, $debugOldest);
                if ($debugOldest) $msg .= self::DisplayFound("oldest extracted from version after format", $oldest);
                $updateData["oldestSegment"] =  $oldest;
            }
            $iiConditions = self::myFind($versionText, "Conditions");
            $versionText = substr($versionText, 0, $iiConditions);
            if ($debugVersion) $msg .= "upto conditions -- $versionText $eol";
            $iiTV = strpos($versionText, "Text version ");
            $versionText = substr($versionText, $iiTV + 13); // after the 'text version'
            $iiSpace = strpos(trim($versionText), " "); // remove leading spaces
            if ($iiSpace !== false)
                $versionText = substr($versionText, $iiSpace + 1);  // after of or from
            $updateData["VersionText"] = trim($versionText);;
            $lastEdit = trim(substr($versionText, 0, 12));
            $lastEditFormatted = "";
            try {
                $lastEditFormatted = freewheelingeasy_kml_book_date::DateFormat($lastEdit, $debugLastEdit);
            } catch (Exception $e) {
                $msg .= "$errorBeg E#2180 while trying to format the last edit date '$lastEdit'  " . $e->getMessage() . $errorEnd;
            };
            $updateData["lastEdit"] = $lastEditFormatted;
            $msg .= rrwUtil::print_r($updateData, true, "updateData from the version section to trail");
            $cnt = $wpdbExtra->update($wpdbExtra->html, $updateData, array("htmTrailId" => $trailId));
        } catch (Exception $e) {
            $msg .= "$errorBeg E#2181 " . $e->getMessage() . $errorEnd;
        };
        return $msg;
    } // end function extractVersionInformation
    private static function UpdateErrorCount($buffer, $trailId)
    {
        global $eol, $errorBeg, $errorEnd;
        $msg = "";
        $debugError = true;
        $errorCount = 0;
        $what = "&gt;&gt;";
        $iiLast = strpos($buffer, $what);
        $cnt = 0;
        while (false !== $iiLast) {
            $cnt++;
            if ($cnt > 50) // runaway
                break;
            $errorCount++;
            $iiLast = strpos($buffer, $what, $iiLast + 3);
        }
        if ($debugError) $msg .= "I#2182 error count is $errorCount $eol";
        $msg .= self::UpdateSql("html", "htmTrailId", $trailId, "unknownCount", $errorCount);
        return $msg;;
    }
    public static function UpdateSql($table, $keyField, $keyvalue, $field, $fieldValue)
    {
        global $eol, $errorBeg, $errorEnd;
        global $goodCnt;
        global $wpdbExtra;
        $msg = "";
        if (rrwParam::Boolean("debugSql")) {
            $showUpdateSql = true;
        } else {
            $showUpdateSql = false;
        }
        $finalTable = "notSet";
        if ($showUpdateSql) $msg .= "UpdateSql( $table, $keyField, $keyvalue, $field, $fieldValue ) $eol ";
        switch ($table) {
            case "trails":
                $finalTable = $wpdbExtra->trails;
                break;
            case "html":
                $finalTable = $wpdbExtra->html;
                break;
            default:
                throw new exception("Unknown table of " . $table);
        } // end switch
        $updateArr = array("$field" => $fieldValue);
        $key = array("$keyField" => "$keyvalue");
        $result = $wpdbExtra->update($finalTable, $updateArr, $key);
        if ($showUpdateSql) $msg .= " #result $result for update - $field to $fieldValue $eol ";
        $goodCnt++;
        return $msg;
    }
    // next two routines should go into a html parser class
    private static function myFind($buffer, $what)
    {
        // returns index of  text string.
        // or -1 if not found
        // or -2 if more than one
        global $eol, $errorBeg, $errorEnd;
        $index = strpos($buffer, $what);
        if (false === $index) {
            print "$eol ----------  in buffer did not find '$what' $eol";
            print htmlspecialchars($buffer, ENT_QUOTES) . "$eol ----------------- $eol";
            return -1;
        }
        $index2 = strpos($buffer, $what, $index + strlen($what));
        if (false !== $index2) {
            return -2;
        }
        return $index;
    } // end myFind
    private static function myFindTextBetween($buffer,  $textStart,   $textEnd,  $removeSpan = false, $debug = false)
    {
        // returns the text between the the two input
        // or blank if not found
        // if remove span, delete to first > in the string
        global $eol, $errorBeg, $errorEnd;
        if ($debug) print "TextBetween( $textStart, $textEnd,  $removeSpan";
        $foundStart = self::myFind($buffer, $textStart);
        if ($debug) print "look found start - '$foundStart' ...";
        if ($foundStart < 0) {
            return "";
        }
        $iiEnd = self::myFind($buffer, $textEnd);
        if ($debug) print "found end - '$iiEnd' $eol";
        if ($iiEnd < 0)
            return "";
        $iiStart = $foundStart + strlen($textStart) + 1;
        $finalText = substr($buffer, $iiEnd, $iiEnd - $iiStart);
        if ($debug) print "complete text found -- $finalText $eol";
        if ($removeSpan) {
            $iiCarrot = strpos($finalText, ">");
            $finalText = substr($finalText, $iiCarrot + 1);
            $iiCarrot = strpos($finalText, "<");
            if (false !== $iiCarrot)
                $finalText = substr($finalText, 0, $iiCarrot);
        }
        if ($debug) print "after remove span -- $finalText $eol";
        $finalText = str_replace("&#9;", "", $finalText); // remove tabs
        $finalText = trim($finalText);
        return $finalText;
    } //   end   findTextBetween
}//end class freewheeling_build_website
