/*
ssc (static site checker)
Copyright (c) 2020 Dylan Harris
https://dylanharris.org/

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public Licence as published by
the Free Software Foundation, either version 3 of the Licence,  or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public Licence for more details.

You should have received a copy of the GNU General Public
Licence along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include "symbol.h"
#include "context.h"
#include "element_myhtml.h"
#include "quote.h"

/*
::std::string get_test (myhtml_tree_node_t* node)
{   mycore_string_raw_t tree_buffer;
    tree_buffer.length = 0;
    tree_buffer.size = 0;
    tree_buffer.data = nullptr;
    myhtml_serialization_tree_buffer (node, &tree_buffer);
    mycore_string_t* str = myhtml_node_string (node);
    size_t len = 0, len2 = 0;
    const char * sz = myhtml_node_text (node, &len);
    void* p = myhtml_node_get_data (node);
    myhtml_token_node_t* t =  myhtml_node_token (node);
    myhtml_namespace_t ns = myhtml_node_namespace (node);
    myhtml_tree_t* tree = myhtml_node_tree (node);
    myhtml_position_t pos = myhtml_node_raw_position (node);
    myhtml_position_t pos3 = myhtml_node_element_position (node);
    myhtml_tag_id_t tag = myhtml_node_tag_id (node);
    bool b = myhtml_node_is_void_element (node);
    bool bclose = myhtml_node_is_close_self (node);


    myhtml_token_node_t* token = myhtml_node_token (node);
    myhtml_position_t pos2 = myhtml_token_node_raw_position (token);
    const char* szt = myhtml_token_node_text (token, &len2);
    mycore_string_t* str2 = myhtml_token_node_string (token);
    myhtml_tag_id_t tag2 = myhtml_token_node_tag_id (token);

    ::std::string res (tree_buffer.data, pos3.length);
    return ::std::string (); }
*/

void get_element_text (myhtml_tree_node_t* node, ::std::string& open, ::std::string& close)
{   mycore_string_raw_t tree_buffer;
    tree_buffer.length = 0;
    tree_buffer.size = 0;
    tree_buffer.data = nullptr;
    ::std::string buffer;
    try
    {   myhtml_serialization_tree_buffer (node, &tree_buffer);
        buffer = ::std::string (tree_buffer.data, tree_buffer.length);
        mycore_string_raw_destroy (&tree_buffer, false); }
    catch (...)
    {   mycore_string_raw_destroy (&tree_buffer, false);
        return; }
    myhtml_position_t pos3 = myhtml_node_element_position (node);
    if (! pos3.length) open = buffer;
    else if (pos3.length > buffer.length ()) open = buffer;
    else if (buffer [pos3.length-1] == '>') open = buffer.substr (0, pos3.length);
    else
    {   size_t fin = buffer.find ('>', pos3.length);
        if ((fin == buffer.npos) || (fin >= buffer.length () - 1)) open = buffer;
        else open = buffer.substr (0, fin+1); }
    if (! myhtml_node_is_close_self (node))
    {   size_t pos = buffer.find_last_of ('<');
        if (pos != buffer.npos && pos > 1) close = buffer.substr (pos); } }

::std::string get_first_attribute_text (myhtml_tree_node_t* node)
{   myhtml_tree_attr_t* attr = myhtml_node_attribute_first (node);
    if (attr != nullptr)
    {   mycore_string_t* hs = myhtml_attribute_key_string (attr);
        if (hs != nullptr) if (hs -> length > 0) return ::std::string (hs -> data, hs -> length); }
    return ::std::string (); }

::std::string get_raw_tag_name (myhtml_tree_node_t* node)
{   myhtml_position_t pos = myhtml_node_raw_position (node);
    myhtml_position_t elpos = myhtml_node_element_position (node);
    if (pos.length > 0)
    {   ::std::string open, close;
        size_t offset = pos.begin - elpos.begin;
        get_element_text (node, open, close);
        if (offset < open.length ()) return open.substr (offset, pos.length); }
    return ::std::string (); }

::std::string get_token_text (myhtml_tree_node_t* node)
{   myhtml_token_node_t* token = myhtml_node_token (node);
    if (token != nullptr)
    {   mycore_string_t* hs = myhtml_token_node_string (token);
        if (hs != nullptr) if (hs -> length > 0) return ::std::string (hs -> data, hs -> length); }
    return ::std::string (); }

::std::string get_raw_text (myhtml_tree_node_t* node)
{   mycore_string_raw_t tree_buffer;
    tree_buffer.length = 0;
    tree_buffer.size = 0;
    tree_buffer.data = nullptr;
    myhtml_serialization_tree_buffer (node, &tree_buffer);
    if (tree_buffer.data != nullptr) return ::std::string (tree_buffer.data, tree_buffer.length);
    return ::std::string (); }

::std::string get_html_string (myhtml_tree_node_t* node)
{   mycore_string_t* hs = myhtml_node_string (node);
    if (hs != nullptr) if (hs -> length > 0) return ::std::string (hs -> data, hs -> length);
    return ::std::string (); }

::std::string get_text (myhtml_tree_node_t* node)
{   size_t len = 0;
    const char * sz = myhtml_node_text (node, &len);
    if (len != 0) return ::std::string (sz, len);
    return ::std::string ("()"); }

::std::string walk_children_and_filter (myhtml_tree_node_t* node, bool text, bool anything)
{   ::std::string res;
    if (node != nullptr)
    {   for (   myhtml_tree_node_t* child = myhtml_node_child (node);
                child != nullptr;
                child = myhtml_node_next (child))
        {   ::std::string open, close;
            myhtml_tag_id_t tag = myhtml_node_tag_id (child);
            switch (tag)
            {   case MyHTML_TAG__TEXT :
                    res += get_text (child);
                    break;
                case MyHTML_TAG_ABBR :
                case MyHTML_TAG_ADDRESS :
                case MyHTML_TAG_B :
                case MyHTML_TAG_BLOCKQUOTE :
                case MyHTML_TAG_BR :
                case MyHTML_TAG_CITE :
                case MyHTML_TAG_CODE :
                case MyHTML_TAG_DFN :
                case MyHTML_TAG_DIV :
                case MyHTML_TAG_DD :
                case MyHTML_TAG_DL :
                case MyHTML_TAG_DT :
                case MyHTML_TAG_EM :
                case MyHTML_TAG_I :
                case MyHTML_TAG_KBD :
                case MyHTML_TAG_LI :
                case MyHTML_TAG_MARK :
                case MyHTML_TAG_OL :
                case MyHTML_TAG_P :
                case MyHTML_TAG_PRE :
                case MyHTML_TAG_Q :
                case MyHTML_TAG_RB :
                case MyHTML_TAG_RUBY :
                case MyHTML_TAG_RP :
                case MyHTML_TAG_RT :
                case MyHTML_TAG_RTC :
                case MyHTML_TAG_SMALL :
                case MyHTML_TAG_STRONG :
                case MyHTML_TAG_S :
                case MyHTML_TAG_SAMP :
                case MyHTML_TAG_SUB :
                case MyHTML_TAG_SUP :
                case MyHTML_TAG_TIME :
                case MyHTML_TAG_U :
                case MyHTML_TAG_UL :
                case MyHTML_TAG_VAR :
                case MyHTML_TAG_WBR :
                    if (text)
                        res += walk_children_and_filter (child, text, anything);
                    else
                    {   get_element_text (child, open, close);
                        res += open;
                        res += walk_children_and_filter (child, text, anything);
                        res += close; }
                    break;
                default :
                    if (! anything)
                        res += walk_children_and_filter (child, text, anything);
                    else
                    {   get_element_text (child, open, close);
                        res += open;
                        res += walk_children_and_filter (child, text, anything);
                        res += close; }
                    break; } } }
    return res; }

::std::string myhtml_element::content (bool text, bool anything) const
{   assert (! invalid ());
    return walk_children_and_filter (node_, text, anything); }

myhtml_tag_id_t myhtml_element::tag () const
{   assert (! invalid ());
    if (tag_ != MyHTML_TAG__UNDEF) return tag_;
    myhtml_tag_id_t tag = myhtml_node_tag_id (node_);
    if (tag >= MyHTML_TAG__END_OF_FILE) return MyHTML_TAG__END_OF_FILE;
    tag_ = tag; return tag; }

::std::string rpt_structure (myhtml_tree_node_t* node, const int depth)
{   ::std::ostringstream tree;
    if (node != nullptr)
    {   ::std::string sol (STEPSPACES * depth, ' ');
        tree.setf (::std::ios_base::hex, ::std::ios_base::basefield);
        tree << sol << myhtml_node_tag_id (node) << ":";
        for (myhtml_tree_node_t* n = myhtml_node_child (node); n != nullptr; n = myhtml_node_next (n))
            tree << " " <<  myhtml_node_tag_id (n);
        tree << "\n";
        for (myhtml_tree_node_t* n = myhtml_node_child (node); n != nullptr; n = myhtml_node_next (n))
            tree << rpt_structure (n, depth+1); }
    return tree.str (); }

::std::string rpt_structure (myhtml_tree_t* tree)
{   return rpt_structure (myhtml_tree_get_document (tree)); }

::std::string myhtml_element::report (const int n) const
{   if (! context.tell (e_structure)) return ::std::string ();
    ::std::ostringstream res;
    res << ::std::string (STEPSPACES*n, ' ') << tag ();
    if (context.tell (e_detail))
    {   res << ' ' << node_;
        if (context.tell (e_splurge))
        {   ::std::string open, close;
            get_element_text (node_, open, close);
            res << ": " << quote (get_text (node_)) << " (" << quote (open) << ", " << quote (close) << ", " << quote (get_raw_tag_name (node_)) << ")"; } }
    res << "\n";
    return res.str (); }

::std::string myhtml_element::get_raw_name () const
{   return get_raw_tag_name (node_); }

::std::string myhtml_element::name ()
{   if (name_.empty ()) name_ = ::boost::algorithm::to_upper_copy (get_raw_tag_name (node_));
    return name_; }
