Kirjautuminen

Haku

Tehtävät

Keskustelu: Ohjelmointikysymykset: Kuinka C++ lukisi taulukon netistä

Sivun loppuun

E.T. [16.05.2011 11:32:02]

#

Tarkoituksena olisi käyttää c++:lla nettisivulla olevaa taulukkoa.
Siinä on kilpailun osallistuja ja arvo, eli ihan vaan 2d taulukko.
Saan sen leikepöydälle (Firefox ja lisäosa) manuaalisesti, mutta
kuinka saisin sen tehtyä ohjelmallisesti?

Daih [16.05.2011 11:40:31]

#

Hei!

Sinun tulisi lukea tuo kyseinen nettisivusi käyttäen jotain valmista kirjastoa (esim. SDL_net tai SFML) ja sitten parsia kyseisen taulukon tiedot sivulta c++ taulukkoon.

neau33 [16.05.2011 14:37:40]

#

Moi E.T.

Mikäli käyttis on Windows niin käytä WinInet API'a (wininet.h), netti on ohjetta pullollaan

os [16.05.2011 17:37:04]

#

libcurl voisi olla varteenotettava porttautuva vaihtoehto. Tähän (C-kirjastoon) näyttäisi löytyvän myös joku C++-wrapperi, jota kannattanee myös kokeilla. HTTP:tä alemmilla protokollatasoilla puljaamista kannattaa välttää. Taulukon saat ongittua HTML:stä kivuttomimmin jonkinlaisen XML-parserin (esim. http://xerces.apache.org/xerces-c/) avulla.

(Mod. edit: Jottei tyylikäs vastaus hukkuisi neau33:n koodien sekaan, tässä on siihen linkki.)

neau33 [17.05.2011 09:53:49]

#

Moi taas!

tässä olisi viritelmä, jolla poimia web-sivulta <table></table> tagit sisältöineen ja tallennella .xml tiedostoksi, mutta, mutta...

// ReadTableFromPage.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <windows.h>
#include <wininet.h>
#pragma comment(lib,"wininet.lib")
#include <iostream>
#include <fstream>
#include <string>
#import <msxml6.dll> named_guids rename_namespace("MyMSXML")
#pragma comment(lib,"msxml6.lib")

using namespace std;
using namespace MyMSXML;

int main(int argc, char* argv[])
{

    try
    {
        remove("c:/testi.xml");
    }
    catch(...){}

    char Buf[1024]; DWORD ReadSize;
    BOOL bResult; string str;

    HINTERNET hInternet = InternetOpenA(
    "UsewrAgent/1.0",INTERNET_OPEN_TYPE_PRECONFIG,
    NULL,NULL,0);
    if(!hInternet)
    {
        printf("hInternet Failed!\n");
        return -1;
    }

    HINTERNET hConnection = InternetConnectA(
    hInternet,
    "http://www.elisanet.fi/nea.fi/tabletest/",
    INTERNET_DEFAULT_HTTPS_PORT,
    "","", INTERNET_SERVICE_HTTP,0,0);
    if (!hConnection)
    {
        InternetCloseHandle(hInternet);
        printf("InternetConnectA failed!\n");
        return -1;
    }

    HINTERNET hFile = InternetOpenUrl(
    hInternet,
    "http://www.elisanet.fi/nea.fi/tabletest/",
    NULL,0,INTERNET_FLAG_RELOAD,0);
    if(!hFile)
    {
        InternetCloseHandle(hConnection);
        InternetCloseHandle(hInternet);
        printf("InternetOpenUrl Failed!\n");
        return -1;
    }

    for(;;)
    {
        ReadSize = 1024;

        bResult = InternetReadFile(
        hFile,Buf,1024,&ReadSize);

        if(bResult && (ReadSize == 0)) break;

        Buf[ReadSize] = '\0';
        str+=("%s", Buf);
    }

    InternetCloseHandle(hFile);
    InternetCloseHandle(hConnection);
    InternetCloseHandle(hInternet);

    size_t ltag;
    size_t utag;
    ltag = str.find("</table>");
    utag = str.find("</TABLE>");

    string stag; string etag;

    if(int(ltag) > -1)
    {
        stag = "<table";
        etag = "/table";
    }
    else if(int(ltag) < 0 && int(utag) > -1)
    {
        stag = "<TABLE";
        etag = "/TABLE";
    }
    else if(int(ltag) == -1 && int(utag) == -1)
    {
        cout << "Page Requested Has No HTML Table!\n";
        return -1;
    }

    size_t spos; spos = str.find(stag);
    size_t epos; epos = str.find(etag);

    spos -= 1; epos += 7; string xmlstr;

    xmlstr = "<?xml version=\"1.0 encoding=\"ISO-8859-1\" ?>\n";
    xmlstr +=  str.substr(spos, epos-spos);
    cout << xmlstr << endl;
    ofstream xmlfile;
    xmlfile.open ("c:/testi.xml");
    xmlfile << xmlstr;
    xmlfile.close();

    IXMLDOMDocument *pDoc=NULL;
    IXMLDOMNodeList* pDomNodeList=NULL;

    //try
    //{
        //*** jutska kääntyy hienosti,
        //(Microsoft Visual C++ 6.0)
        //mutta tässä tökkää kun ohjelmaa ajetaan
        //(tökkää myös kvalidilla .XML -tiedostolla)

        pDoc->load("c:/test.xml");

        //*** elikäs olisko jollain jotain ratkaisua?

    //}
    //catch(...){}

    BSTR bstr = ::SysAllocString(L"<tr>");
    BSTR bstr2 = ::SysAllocString(L"<td>");
    IXMLDOMNodePtr nodeptr;

    return 0;
}

virhekuvauksia löytyy täältä ja täältä

punppis [18.05.2011 14:30:05]

#

TinyXML:llä onnistunee varmasti myös tuo parsiminen melko helposti.

neau33 [18.05.2011 21:29:45]

#

Heippa taas E.T.!

elikä siis näin lukisit taulukon nettisivulta ja pukkaisit XML Documenttiin Microsoft Visual C++ 6.0/WinInet API/MSXML6 ympäristössä...

// ReadTableFromPage.cpp : Defines the entry point for the console application.
//
#define UNICODE

#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <string>
#include <atlbase.h>
#include <atlconv.h>
#include <comdef.h>
#include <windows.h>
#include <wininet.h>
#pragma comment(lib,"wininet.lib")
#import <msxml6.dll> named_guids
#pragma comment(lib,"msxml6.lib")

using namespace std;
using namespace MSXML2;

int main(int argc, char* argv[])
{
   USES_CONVERSION;

   char Buf[1024]; DWORD ReadSize;
   BOOL bResult; string str;

   HINTERNET hInternet = InternetOpenA(
   "UsewrAgent/1.0",INTERNET_OPEN_TYPE_PRECONFIG,
   NULL,NULL,0);
   if(!hInternet)
   {
      printf("hInternet Failed!\n");
      return -1;
   }

   HINTERNET hConnection = InternetConnectA(
   hInternet,
   "http://www.elisanet.fi/nea.fi/tablex",
   INTERNET_DEFAULT_HTTPS_PORT,
   "","", INTERNET_SERVICE_HTTP,0,0);

   if (!hConnection)
   {
      InternetCloseHandle(hInternet);
      printf("InternetConnectA failed!\n");
      return -1;
   }

   HINTERNET hFile = InternetOpenUrl(
   hInternet,
   "http://www.elisanet.fi/nea.fi/tablex",
   NULL,0,INTERNET_FLAG_RELOAD,0);
   if(!hFile)
   {
      InternetCloseHandle(hConnection);
      InternetCloseHandle(hInternet);
      printf("InternetOpenUrl Failed!\n");
      return -1;
   }

   for(;;)
   {
      ReadSize = 1024;

      bResult = InternetReadFile(
      hFile,Buf,1024,&ReadSize);

      if(bResult && (ReadSize == 0)) break;

      Buf[ReadSize] = '\0'; str+=("%s", Buf);
   }

   InternetCloseHandle(hFile);
   InternetCloseHandle(hConnection);
   InternetCloseHandle(hInternet);

   size_t ltag; size_t utag;
   ltag = str.find("</table>");
   utag = str.find("</TABLE>");

   string stag; string etag;

   if(int(ltag) > -1)
   {
      stag = "<table"; etag = "/table";
   }
   else if(int(ltag) < 0 && int(utag) > -1)
   {
      stag = "<TABLE"; etag = "/TABLE";
   }
   else if(int(ltag) == -1 && int(utag) == -1)
   {
      printf("Requested page has no table element!\n");
      return -1;
   }

   size_t spos; spos = str.find(stag);
   size_t epos; epos = str.find(etag);

   spos -= 1; epos += 7; string xmlstr;

   xmlstr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
   xmlstr +=  str.substr(spos, epos-spos);

   CComBSTR cbstrData(xmlstr.c_str());
   BSTR bstrXml = cbstrData.Copy();

   SysFreeString(cbstrData); xmlstr="";

   HRESULT hr;

   hr = CoInitialize(NULL);
   if(FAILED (hr))
   {
      printf("HRESULT Initialization failed!\n");
      SysFreeString(bstrXml);
      return -1;
   }

   IXMLDOMDocumentPtr xmlDoc = NULL;
   xmlDoc.CreateInstance(__uuidof(DOMDocument));
   if(xmlDoc==NULL)
   {
      printf("DOMDocument creation failed!\n");
      SysFreeString(bstrXml);
      return -1;
   }


   xmlDoc->put_async(VARIANT_TRUE);
   xmlDoc->put_preserveWhiteSpace(VARIANT_TRUE);
   xmlDoc->put_validateOnParse(VARIANT_FALSE);

   VARIANT_BOOL _result;
   hr = xmlDoc->raw_loadXML(bstrXml, &_result);
   if(FAILED (hr))
   {
      printf("Loading .xml data failed!\n");
      SysFreeString(bstrXml);
      return -1;
   }

   _bstr_t xmldata;
   xmldata = xmlDoc->Getxml();

   string chkstr = OLE2CA(xmldata);

   if(chkstr != "")
   {
      SysFreeString(bstrXml);
      printf(OLE2CA(xmldata));
      printf("\n\nXML data successfully loaded to IXMLDocument object!\n");
      SysFreeString(xmldata);
   }
   else
   {
      SysFreeString(xmldata);
      printf(OLE2CA(bstrXml));
      printf("\n\nXML data still not loaded into XMLDocument!\n");
      SysFreeString(bstrXml);
   }

   return 0;
}

neau33 [18.05.2011 21:42:41]

#

Heippa taas E.T.!

elikä siis näin lukisit taulukon nettisivulta ja pukkaisit XML Documenttiin Microsoft Visual C++ 6.0/WinInet API/MSXML6 ympäristössä...

// ReadTableFromPage.cpp : Defines the entry point for the console application.
//
#define UNICODE

#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <string>
#include <atlbase.h>
#include <atlconv.h>
#include <comdef.h>
#include <windows.h>
#include <wininet.h>
#pragma comment(lib,"wininet.lib")
#import <msxml6.dll> named_guids
#pragma comment(lib,"msxml6.lib")

using namespace std;
using namespace MSXML2;

int main(int argc, char* argv[])
{
   USES_CONVERSION;

   char Buf[1024]; DWORD ReadSize;
   BOOL bResult; string str;

   HINTERNET hInternet = InternetOpenA(
   "UsewrAgent/1.0",INTERNET_OPEN_TYPE_PRECONFIG,
   NULL,NULL,0);
   if(!hInternet)
   {
      printf("hInternet Failed!\n");
      return -1;
   }

   HINTERNET hConnection = InternetConnectA(
   hInternet,
   "http://www.elisanet.fi/nea.fi/tablex",
   INTERNET_DEFAULT_HTTPS_PORT,
   "","", INTERNET_SERVICE_HTTP,0,0);

   if (!hConnection)
   {
      InternetCloseHandle(hInternet);
      printf("InternetConnectA failed!\n");
      return -1;
   }

   HINTERNET hFile = InternetOpenUrl(
   hInternet,
   "http://www.elisanet.fi/nea.fi/tablex",
   NULL,0,INTERNET_FLAG_RELOAD,0);
   if(!hFile)
   {
      InternetCloseHandle(hConnection);
      InternetCloseHandle(hInternet);
      printf("InternetOpenUrl Failed!\n");
      return -1;
   }

   for(;;)
   {
      ReadSize = 1024;

      bResult = InternetReadFile(
      hFile,Buf,1024,&ReadSize);

      if(bResult && (ReadSize == 0)) break;

      Buf[ReadSize] = '\0'; str+=("%s", Buf);
   }

   InternetCloseHandle(hFile);
   InternetCloseHandle(hConnection);
   InternetCloseHandle(hInternet);

   size_t ltag; size_t utag;
   ltag = str.find("</table>");
   utag = str.find("</TABLE>");

   string stag; string etag;

   if(int(ltag) > -1)
   {
      stag = "<table"; etag = "/table";
   }
   else if(int(ltag) < 0 && int(utag) > -1)
   {
      stag = "<TABLE"; etag = "/TABLE";
   }
   else if(int(ltag) == -1 && int(utag) == -1)
   {
      printf("Requested page has no table element!\n");
      return -1;
   }

   size_t spos; spos = str.find(stag);
   size_t epos; epos = str.find(etag);

   spos -= 1; epos += 7; string xmlstr;

   xmlstr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
   xmlstr +=  str.substr(spos, epos-spos);

   CComBSTR cbstrData(xmlstr.c_str());
   BSTR bstrXml = cbstrData.Copy();

   SysFreeString(cbstrData); xmlstr="";

   HRESULT hr;

   hr = CoInitialize(NULL);
   if(FAILED (hr))
   {
      printf("HRESULT Initialization failed!\n");
      SysFreeString(bstrXml);
      return -1;
   }

   IXMLDOMDocumentPtr xmlDoc = NULL;
   xmlDoc.CreateInstance(__uuidof(DOMDocument));
   if(xmlDoc==NULL)
   {
      printf("DOMDocument creation failed!\n");
      SysFreeString(bstrXml);
      return -1;
   }


   xmlDoc->put_async(VARIANT_TRUE);
   xmlDoc->put_preserveWhiteSpace(VARIANT_TRUE);
   xmlDoc->put_validateOnParse(VARIANT_FALSE);

   VARIANT_BOOL _result;
   hr = xmlDoc->raw_loadXML(bstrXml, &_result);
   if(FAILED (hr))
   {
      printf("Loading .xml data failed!\n");
      SysFreeString(bstrXml);
      return -1;
   }

   _bstr_t xmldata;
   xmldata = xmlDoc->Getxml();

   string chkstr = OLE2CA(xmldata);

   if(chkstr != "")
   {
      SysFreeString(bstrXml);
      printf(OLE2CA(xmldata));
      printf("\n\nXML data successfully loaded to IXMLDocument object!\n");
      SysFreeString(xmldata);
   }
   else
   {
      SysFreeString(xmldata);
      printf(OLE2CA(bstrXml));
      printf("\n\nXML data still not loaded into XMLDocument!\n");
      SysFreeString(bstrXml);
   }

   return 0;
}

neau33 [18.05.2011 23:01:20]

#

Heippa taas!

tässä vielä sama jutska ilman WinInet API'a...

// XMLHTTPRequestSample.cpp : Defines the entry point for the console application.
//
#define UNICODE

#include "stdafx.h"
#include <iostream>
#include <string>
#include <atlbase.h>
#include <windows.h>

#import <msxml6.dll> named_guids
#pragma comment(lib,"msxml6.lib")

using namespace std;
using namespace MSXML2;

int main(int argc, char* argv[])
{

   USES_CONVERSION;

   HRESULT hr;
   hr = CoInitialize(NULL);

   if(FAILED (hr))
	{
      printf("HRESULT Initialization failed!\n");
      return -1;
   }

   bstr_t sUrl = "http://www.elisanet.fi/nea.fi/tablex/index.html";

   IXMLHTTPRequestPtr xmlRequest = NULL;

   xmlRequest.CreateInstance(__uuidof(XMLHTTP60));

   if(xmlRequest==NULL)
   {
      printf("XMLHTTPRequest object creation failed!\n");
      return -1;
   }

   hr = xmlRequest->open(_bstr_t(_T("POST")), sUrl, VARIANT_FALSE);
   f(FAILED (hr))
   {
      cout << "XMLRequest open failed!" << endl;
      return -1;
   }
   xmlRequest->setRequestHeader((bstr_t)"Content-Type", (bstr_t)"text/xml");
   xmlRequest->send();

   if(200 == xmlRequest->status)
   {
      string rawstr = W2A(xmlRequest->responseText);

      size_t ltag; size_t utag;
      ltag = rawstr.find("</table>");
      utag = rawstr.find("</TABLE>");
      string stag; string etag;

      if(int(ltag) > -1)
      {
      	stag = "<table"; etag = "/table";
      }
      	else if(int(ltag) < 0 && int(utag) > -1)
      {
      	stag = "<TABLE"; etag = "/TABLE";
      }
      else if(int(ltag) == -1 && int(utag) == -1)
      {
      	printf("Requested page has no table element!\n");
      	return -1;
      }

      string rTag = stag.erase(0,1);
      size_t spos; spos = rawstr.find(stag);
      size_t epos; epos = rawstr.find(etag);
      spos -= 1; epos += 7; string xmlstr;

      xmlstr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
      xmlstr +=  rawstr.substr(spos, epos-spos);

      CComBSTR cbstrData(xmlstr.c_str());
      BSTR bstrXml = cbstrData.Copy();
      SysFreeString(cbstrData);
      rawstr=""; xmlstr="";

      CComBSTR cbstrTag(rTag.c_str());
      BSTR bstrTag = cbstrTag;
      SysFreeString(cbstrTag);

      IXMLDOMDocumentPtr xmlDoc = NULL;
      xmlDoc.CreateInstance(__uuidof(DOMDocument));
      if(xmlDoc==NULL)
      {
         printf("DOMDocument creation failed!\n");
         SysFreeString(bstrXml);
         return -1;
      }

      xmlDoc->put_async(VARIANT_TRUE);
      xmlDoc->put_preserveWhiteSpace(VARIANT_TRUE);
      xmlDoc->put_validateOnParse(VARIANT_FALSE);

      VARIANT_BOOL _result;

      xmlDoc->raw_loadXML(bstrXml, &_result);
      ::SysFreeString(bstrXml);

      if(!_result)
      {
         cout << "Loading XML data failed!" << endl;
         return -1;
      }
      IXMLDOMNodePtr xmlNode;

      cout << "Here's the whole XML:" << endl;
      cout << W2A(xmlDoc->xml) << endl;

      cout << "And here's the table:" << endl;
      xmlNode = xmlDoc->selectSingleNode(bstrTag);
      cout << W2A(xmlNode->xml) << endl;

   }

   else
   {
      printf(_T("\nError: %s\n"), W2A(xmlRequest->statusText));
   }

   return 0;
}

os [19.05.2011 15:38:10]

#

... ja tässä kokonaan ilman Windowsia :), käyttäen cURLpp- ja TinyXML-kirjastoja. Ikävien C-(tyylisten) rajapintojen käsittelyltä voi monesti välttyä käyttämällä muiden tekemiä valmiita wrappereita.

#include <curlpp/Options.hpp>
#include <tinyxml.h>
#include <sstream>

int main()
{
	try
	{
		// Alustetaan cURLpp (RAII)
		curlpp::Cleanup curlppHandle;

		// Luetaan X(HT)ML-data puskuriin
		std::stringstream bufstream;
		bufstream << curlpp::options::Url("http://osoite");

		// Parsitaan XML-data
		TiXmlDocument doc;
		bufstream >> doc;

		// ... hae tarvittava tieto XML-tietorakenteesta "doc"
	}
	catch(curlpp::RuntimeError & e)
	{
		// cURL:in virheilmoitukset
		std::cerr << e.what() << std::endl;
	}

	return 0;
}

Työläin osa on taulukon parsiminen haluamaasi muotoon puumaisesta XML-tietorakenteesta. Tämä operaatio puuttuu sekä tästä että neau33:n koodista.

EDIT: neau33, koodisi ei muuten myöskään löydä koko taulukko-osaa, jos sen sisällä on toisia taulukoita.

neau33 [19.05.2011 16:40:54]

#

Heippa taas!

No oli miten oli, mutta tässä vielä samaa paskaa VC++.NET ympäristössä...

// XMLHTTPRequestSample_VC++NET.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <iostream>
#include <atlbase.h>
#include <windows.h>
#import <msxml6.dll> named_guids
#pragma comment(lib,"msxml6.lib")

#pragma warning( disable: 4018 4786)
#include <string>
#pragma warning( pop )

using namespace std;
using namespace MSXML2;

int _tmain(int argc, _TCHAR* argv[])
{
   USES_CONVERSION;
   HRESULT hr;
   hr = CoInitialize(NULL);

   if(FAILED (hr))
   {
      printf("HRESULT Initialization failed!\n");
      return -1;
   }

   bstr_t sUrl = "http://www.elisanet.fi/nea.fi/tablex/index.html";

   IXMLHTTPRequestPtr xmlRequest = NULL;

   xmlRequest.CreateInstance(__uuidof(XMLHTTP60));
   if(xmlRequest==NULL)
   {
      printf("XMLHTTPRequest object creation failed!\n");
      return -1;
   }

   hr = xmlRequest->open(_bstr_t(_T("POST")), sUrl, VARIANT_FALSE);
   if(FAILED (hr))
   {
      cout << "XMLRequest open failed!" << endl;
      return -1;
   }

   xmlRequest->setRequestHeader((bstr_t)"Content-Type", (bstr_t)"text/xml");
   xmlRequest->send();

   if(200 == xmlRequest->status)
   {
      string rawstr = W2A(xmlRequest->responseText);

      size_t ltag; size_t utag;
      ltag = rawstr.find("</table>");
      utag = rawstr.find("</TABLE>");

      string stag; string etag;

      if(int(ltag) > -1)
      {
         stag = "<table"; etag = "/table";
      }
         else if(int(ltag) < 0 && int(utag) > -1)
      {
         stag = "<TABLE"; etag = "/TABLE";
      }
         else if(int(ltag) == -1 && int(utag) == -1)
      {
         printf("Requested page has no table element!\n");
         return -1;
      }

      string rTag = stag.erase(0,1);
      size_t spos; spos = rawstr.find(stag);
      size_t epos; epos = rawstr.find(etag);
      spos -= 1; epos += 7; string xmlstr;

      xmlstr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
      xmlstr +=  rawstr.substr(spos, epos-spos);

      CComBSTR cbstrData(xmlstr.c_str());
      BSTR bstrXml = cbstrData.Copy();
      SysFreeString(cbstrData);
      rawstr=""; xmlstr="";

      CComBSTR cbstrTag(rTag.c_str());
      BSTR bstrTag = cbstrTag;
      SysFreeString(cbstrTag);

      CComPtr<::IXMLDOMDocument> xmlDoc;
      xmlDoc.CoCreateInstance(__uuidof(::DOMDocument));
      if(xmlDoc==NULL)
      {
         printf("DOMDocument creation failed!\n");
         SysFreeString(bstrXml);
         return -1;
      }

      xmlDoc->put_async(VARIANT_FALSE);
      VARIANT_BOOL _result;

      xmlDoc->loadXML(bstrXml, &_result);
      ::SysFreeString(bstrXml);

      if(!_result)
      {
         cout << "loading XML data failed!" << endl;
         return -1;
      }

      ::IXMLDOMNodePtr xmlNode;

      BSTR tempXml;
      hr = xmlDoc->get_xml(&tempXml);
      if(FAILED (hr))
      {
         ::SysFreeString(tempXml);
         ::SysFreeString(bstrTag);
         cout << "Retrieving XML data failed!" << endl;
         return -1;
      }
      cout << "Here's the whole XML:" << endl;
      cout << W2A(tempXml) << endl;
      ::SysFreeString(tempXml);

      ::IXMLDOMNode *pNode;
      hr = xmlDoc->selectSingleNode(bstrTag, &pNode);
      if(FAILED (hr))
      {
         ::SysFreeString(tempXml);
         ::SysFreeString(bstrTag);
         cout << "Root node not found!" << endl;
         return -1;
      }

      hr = pNode->get_xml(&tempXml);
      if(FAILED (hr))
      {
         ::SysFreeString(tempXml);
         ::SysFreeString(bstrTag);
         cout << "Retrievin XML data failed!" << endl;
         return -1;
      }

      cout << "And here's the table:" << endl;
      cout << W2A(temp2Xml) << endl;
      ::SysFreeString(tempXml);
      ::SysFreeString(bstrTag);

   }
   else
   {
      printf("\nError: %s\n", W2A(xmlRequest->statusText));
   }

   return 0;
}

neau33 [29.05.2011 17:27:55]

#

Heippa taas!

tässä vielä samaa paskaa VC++.NET/WinForms ympäristössä...(No need for XML)

MSHTML_Sample:
Lomakkeelle (Form1):
GroupBox kontrolli (groupBox1)
WebBrowser kontrolli (webBrowser1) ->(groupBoxiin)
DataGrid kontrolli (dataGrid1)
sekä nappi (button1)
Referenssi Microsoft.mshtml.dll
löytyy seuraavista Office PIAs paketeista:
(Microsoft Office Primary Interop Assemblies)

Office XP PIAs
Office 2003 Update: Redistributable Primary Interop Assemblies
2007 Microsoft Office System Update: Redistributable Primary Interop Assemblies
Microsoft Office 2010: Primary Interop Assemblies Redistributable

//Form1.h
#pragma once

namespace MSHTML_Sample {

   using namespace mshtml;
   using namespace System;
   using namespace System::ComponentModel;
   using namespace System::Collections;
   using namespace System::Windows::Forms;
   using namespace System::Data;
   using namespace System::Drawing;

   /// <summary>
   /// Summary for Form1
   /// </summary>
   public ref class Form1 : public System::Windows::Forms::Form
   {
   public:
      Form1(void)
      {
         InitializeComponent();
         //
         //TODO: Add the constructor code here
         //
      }

   protected:
      /// <summary>
      /// Clean up any resources being used.
      /// </summary>
      ~Form1()
      {
         if (components)
         {
            delete components;
         }
      }
   private: System::Windows::Forms::GroupBox^  groupBox1;
   protected:
   private: System::Windows::Forms::WebBrowser^  webBrowser1;

   private: cli::array<System::Data::DataTable^>^ tableArray;
   private: mshtml::HTMLDocument^ pDoc;
   private: mshtml::IHTMLElementCollection^ htmlTables;
   private: System::Windows::Forms::Button^  button1;
   private: System::Windows::Forms::DataGrid^  dataGrid1;


   private:
      /// <summary>
      /// Required designer variable.
      /// </summary>
      System::ComponentModel::Container ^components;


   //Windows Form Designer generated code...

   private: System::Void Form1_Load(
   System::Object^  sender, System::EventArgs^  e)
   {
      this->groupBox1->Visible = false;
      this->button1->Enabled = false;
   }
   private: System::Void webBrowser1_DocumentCompleted(System::Object^  sender,
   System::Windows::Forms::WebBrowserDocumentCompletedEventArgs^  e)
   {
      pDoc=(HTMLDocument^)webBrowser1->Document->DomDocument;
      while(pDoc->readyState != "complete"){}
      htmlTables = pDoc->getElementsByTagName("table");
      this->button1->Enabled = true;
   }
   private: System::Void Form1_Shown(
   System::Object^  sender, System::EventArgs^  e)
   {
      //testi...
      this->webBrowser1->Url = gcnew Uri(
      "http://www.elisanet.fi/nea.fi/tablex/index.html");
   }

   private: System::Void button1_Click(
   System::Object^  sender, System::EventArgs^  e)
   {

      if(htmlTables->length > 0)
      {
         int tables = htmlTables->length;
         tableArray = gcnew cli::array<System::Data::DataTable^>(tables);

         for(int i = 0; i < tables; ++i)
         {
            tableArray[i] = gcnew DataTable;
            IHTMLDOMNode^ tableNode;
            tableNode = (IHTMLDOMNode^)htmlTables->item(i, nullptr);

            IHTMLDOMNode^ bodyNode;
            bodyNode = (IHTMLDOMNode^)tableNode->firstChild;

            if(bodyNode->nodeName == "TBODY")
            {

               IHTMLDOMChildrenCollection^ trNodes;
               trNodes = (IHTMLDOMChildrenCollection^)bodyNode->childNodes;

               int rows = trNodes->length;
               if(rows < 0) {break;}

               for(int j=0;j<rows;++j)
               {

                  IHTMLDOMNode^ tr;
                  tr = (IHTMLDOMNode^)trNodes->item(j);

                  if(tr->nodeName == "TR")
                  {
                     IHTMLDOMChildrenCollection^ tdNodes;
                     tdNodes = (IHTMLDOMChildrenCollection^)tr->childNodes;

                     int columns = tdNodes->length;
                     if(columns < 0){break;}

                     DataRow^ drow;
                     drow = tableArray[i]->NewRow();

                     for(int k = 0;k < columns; ++k)
                     {

                        if(tableArray[i]->Columns->Count == 0)
                        {
                           for(int l=0;l<columns;++l)
                           {
                              int^ colindex;
                              colindex = l + 1;
                              String^ strInt;
                              strInt = colindex->ToString();

                              String^ colName = "Column";
                              colName += strInt;
                              tableArray[i]->Columns->Add(
                              colName, String::typeid);
                           }
                        }

                        IHTMLDOMNode^ td;
                        td = (IHTMLDOMNode^)tdNodes->item(k);

                        if(td->nodeName == "TD")
                        {
                           IHTMLElement^ elem;
                           elem = (IHTMLElement^)td;
                           drow->default[k]= elem->innerText;
                        }
                     }

                     tableArray[i]->Rows->Add(drow);
                  }
               }
            }
         }
      }

      if(tableArray->Length > 0)
      {
         this->dataGrid1->DataSource = tableArray[0];
      }

   }
   };
}

-Nea-

neau33 [09.06.2011 08:43:34]

#

Heippa taas!

tässä vielä samaa paskaa SharpDevelop 4.0/VC++.NET/Console ympäristössä...
(No need for WebBrowser, no need for XML)

// VC++.NET MFC Console Application
//(väännetty SharpDevelop 4.0:lla)
using namespace mshtml;
using namespace System;
using namespace System::Threading;
using namespace System::Data;
using namespace Microsoft::VisualStudio::OLE::Interop;

int main(array<System::String ^> ^args)
{
   String^ url =
   "http://www.elisanet.fi/nea.fi/tablex/index.html";

   HTMLDocument^ objMSHTML;
   IHTMLDocument2^ objDocument;
   IPersistStreamInit^ ips;
   objMSHTML = gcnew HTMLDocument;
   ips = (IPersistStreamInit^)objMSHTML;
   ips->InitNew();
   objDocument = objMSHTML->createDocumentFromUrl(url, "");

   while(objDocument->readyState != "complete")
   {
      Thread::Sleep(100);
   }

   objMSHTML = nullptr;
   HTMLDocument^ theDoc = (HTMLDocument^)objDocument;
   IHTMLElementCollection^ htmlTables;
   htmlTables = theDoc->getElementsByTagName("table");

   int tables = htmlTables->length;

   if(tables==0)
   {
      Console::Write("Requested page has no table elements!");
      return -1;
   }

   cli::array<DataTable^>^ TableArray =
   gcnew cli::array<DataTable^>(tables);


    //tallennetaan HTML taulukoiden data datataulukoihin
   for(int i=0;i<tables;++i)
   {
      TableArray[i] = gcnew DataTable();

      IHTMLDOMNode^ tableNode = nullptr;
      tableNode =(IHTMLDOMNode^)htmlTables->item(i, nullptr);

      IHTMLDOMNode^ bodyNode = nullptr;
      bodyNode = (IHTMLDOMNode^)tableNode->firstChild;
      String^ bodyTag = bodyNode->nodeName;

      if(bodyTag->ToUpper() == "TBODY")
      {
         IHTMLDOMChildrenCollection^ trNodes = nullptr;
         trNodes = (IHTMLDOMChildrenCollection^)bodyNode->childNodes;

         int rows = trNodes->length;

         if(rows == 0){break;}

         for(int j=0;j<rows;++j)
         {
            IHTMLDOMNode^ tr = nullptr;
            tr = (IHTMLDOMNode^)trNodes->item(j);
            String^ trTag = tr->nodeName;

            if(trTag->ToUpper() == "TR")
            {
               IHTMLDOMChildrenCollection^ tdNodes = nullptr;
               tdNodes = (IHTMLDOMChildrenCollection^)tr->childNodes;

               int cols = tdNodes->length;

               if(cols == 0){break;}

               DataRow^ drow = nullptr;
               drow = TableArray[i]->NewRow();

               for(int k=0;k<cols;++k)
               {
                  if(TableArray[i]->Columns->Count == 0)
                  {
                     for(int l=0;l<cols;++l)
                     {
                        int^ colindex = l + 1;
                        String^ strInt = colindex->ToString();
                        String^ colName = "Column";
                        colName += strInt;
                        TableArray[i]->Columns->Add(colName, String::typeid);
                     }
                  }

                  IHTMLDOMNode^ td = nullptr;
                  td =(IHTMLDOMNode^)tdNodes->item(k);
                  String^ tdTag = td->nodeName;


                  if(tdTag->ToUpper() == "TD")
                  {
                     IHTMLElement^ cell = nullptr;
                     cell =(IHTMLElement^)td;
                     drow->default[k]= cell->innerText;
                  }
               }

               TableArray[i]->Rows->Add(drow);
            }
         }
      }
   }

   //tulostetaan datataulukoiden data
   if(TableArray->Length > 0)
   {
      int tables = TableArray->Length;
      int^ tblcount = tables;
      Console::Clear();
      Console::Write(Environment::NewLine);

      for(int i=0;i<tables;++i)
      {
         int^ tblindex = i + 1;
         String^ header = "Table ";
         header += tblindex->ToString();
         header += "/" + tblcount->ToString();
         Console::Write(header + "\n");

         int rows = TableArray[i]->Rows->Count;
         int cols = TableArray[i]->Columns->Count;

         for(int j=0;j<rows;++j)
         {
            for(int k=0;k<cols;++k)
            {
               Console::Write(TableArray[i]->Rows[j][k]);

               if(k<cols)
               {
                  Console::Write(" | ");
               }
            }

            Console::Write(Environment::NewLine);
         }

         Console::Write(Environment::NewLine);
      }
   }

   Console::Write("Press key Enter to exit...\n");
   Console::Read();

   return 0;
}

tesmu [10.06.2011 20:48:15]

#

Coding war is dangerous thing!


Sivun alkuun

Vastaus

Aihe on jo aika vanha, joten et voi enää vastata siihen.

Tietoa sivustosta