Tarkoituksena olisi käyttää c++:lla nettisivulla olevaa taulukkoa.
Siinä on kilpailun osallistuja ja arvo, eli ihan vaan 2d taulukko.
Saan sen leikepöydälle (Firefox ja lisäosa) manuaalisesti, mutta
kuinka saisin sen tehtyä ohjelmallisesti?
Hei!
Sinun tulisi lukea tuo kyseinen nettisivusi käyttäen jotain valmista kirjastoa (esim. SDL_net tai SFML) ja sitten parsia kyseisen taulukon tiedot sivulta c++ taulukkoon.
Moi E.T.
Mikäli käyttis on Windows niin käytä WinInet API'a (wininet.h), netti on ohjetta pullollaan
libcurl voisi olla varteenotettava porttautuva vaihtoehto. Tähän (C-kirjastoon) näyttäisi löytyvän myös joku C++-wrapperi, jota kannattanee myös kokeilla. HTTP:tä alemmilla protokollatasoilla puljaamista kannattaa välttää. Taulukon saat ongittua HTML:stä kivuttomimmin jonkinlaisen XML-parserin (esim. http://xerces.apache.org/xerces-c/) avulla.
(Mod. edit: Jottei tyylikäs vastaus hukkuisi neau33:n koodien sekaan, tässä on siihen linkki.)
Moi taas!
tässä olisi viritelmä, jolla poimia web-sivulta <table></table> tagit sisältöineen ja tallennella .xml tiedostoksi, mutta, mutta...
// ReadTableFromPage.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include <windows.h> #include <wininet.h> #pragma comment(lib,"wininet.lib") #include <iostream> #include <fstream> #include <string> #import <msxml6.dll> named_guids rename_namespace("MyMSXML") #pragma comment(lib,"msxml6.lib") using namespace std; using namespace MyMSXML; int main(int argc, char* argv[]) { try { remove("c:/testi.xml"); } catch(...){} char Buf[1024]; DWORD ReadSize; BOOL bResult; string str; HINTERNET hInternet = InternetOpenA( "UsewrAgent/1.0",INTERNET_OPEN_TYPE_PRECONFIG, NULL,NULL,0); if(!hInternet) { printf("hInternet Failed!\n"); return -1; } HINTERNET hConnection = InternetConnectA( hInternet, "http://www.elisanet.fi/nea.fi/tabletest/", INTERNET_DEFAULT_HTTPS_PORT, "","", INTERNET_SERVICE_HTTP,0,0); if (!hConnection) { InternetCloseHandle(hInternet); printf("InternetConnectA failed!\n"); return -1; } HINTERNET hFile = InternetOpenUrl( hInternet, "http://www.elisanet.fi/nea.fi/tabletest/", NULL,0,INTERNET_FLAG_RELOAD,0); if(!hFile) { InternetCloseHandle(hConnection); InternetCloseHandle(hInternet); printf("InternetOpenUrl Failed!\n"); return -1; } for(;;) { ReadSize = 1024; bResult = InternetReadFile( hFile,Buf,1024,&ReadSize); if(bResult && (ReadSize == 0)) break; Buf[ReadSize] = '\0'; str+=("%s", Buf); } InternetCloseHandle(hFile); InternetCloseHandle(hConnection); InternetCloseHandle(hInternet); size_t ltag; size_t utag; ltag = str.find("</table>"); utag = str.find("</TABLE>"); string stag; string etag; if(int(ltag) > -1) { stag = "<table"; etag = "/table"; } else if(int(ltag) < 0 && int(utag) > -1) { stag = "<TABLE"; etag = "/TABLE"; } else if(int(ltag) == -1 && int(utag) == -1) { cout << "Page Requested Has No HTML Table!\n"; return -1; } size_t spos; spos = str.find(stag); size_t epos; epos = str.find(etag); spos -= 1; epos += 7; string xmlstr; xmlstr = "<?xml version=\"1.0 encoding=\"ISO-8859-1\" ?>\n"; xmlstr += str.substr(spos, epos-spos); cout << xmlstr << endl; ofstream xmlfile; xmlfile.open ("c:/testi.xml"); xmlfile << xmlstr; xmlfile.close(); IXMLDOMDocument *pDoc=NULL; IXMLDOMNodeList* pDomNodeList=NULL; //try //{ //*** jutska kääntyy hienosti, //(Microsoft Visual C++ 6.0) //mutta tässä tökkää kun ohjelmaa ajetaan //(tökkää myös kvalidilla .XML -tiedostolla) pDoc->load("c:/test.xml"); //*** elikäs olisko jollain jotain ratkaisua? //} //catch(...){} BSTR bstr = ::SysAllocString(L"<tr>"); BSTR bstr2 = ::SysAllocString(L"<td>"); IXMLDOMNodePtr nodeptr; return 0; }
TinyXML:llä onnistunee varmasti myös tuo parsiminen melko helposti.
Heippa taas E.T.!
elikä siis näin lukisit taulukon nettisivulta ja pukkaisit XML Documenttiin Microsoft Visual C++ 6.0/WinInet API/MSXML6 ympäristössä...
// ReadTableFromPage.cpp : Defines the entry point for the console application. // #define UNICODE #include "stdafx.h" #include <iostream> #include <fstream> #include <string> #include <atlbase.h> #include <atlconv.h> #include <comdef.h> #include <windows.h> #include <wininet.h> #pragma comment(lib,"wininet.lib") #import <msxml6.dll> named_guids #pragma comment(lib,"msxml6.lib") using namespace std; using namespace MSXML2; int main(int argc, char* argv[]) { USES_CONVERSION; char Buf[1024]; DWORD ReadSize; BOOL bResult; string str; HINTERNET hInternet = InternetOpenA( "UsewrAgent/1.0",INTERNET_OPEN_TYPE_PRECONFIG, NULL,NULL,0); if(!hInternet) { printf("hInternet Failed!\n"); return -1; } HINTERNET hConnection = InternetConnectA( hInternet, "http://www.elisanet.fi/nea.fi/tablex", INTERNET_DEFAULT_HTTPS_PORT, "","", INTERNET_SERVICE_HTTP,0,0); if (!hConnection) { InternetCloseHandle(hInternet); printf("InternetConnectA failed!\n"); return -1; } HINTERNET hFile = InternetOpenUrl( hInternet, "http://www.elisanet.fi/nea.fi/tablex", NULL,0,INTERNET_FLAG_RELOAD,0); if(!hFile) { InternetCloseHandle(hConnection); InternetCloseHandle(hInternet); printf("InternetOpenUrl Failed!\n"); return -1; } for(;;) { ReadSize = 1024; bResult = InternetReadFile( hFile,Buf,1024,&ReadSize); if(bResult && (ReadSize == 0)) break; Buf[ReadSize] = '\0'; str+=("%s", Buf); } InternetCloseHandle(hFile); InternetCloseHandle(hConnection); InternetCloseHandle(hInternet); size_t ltag; size_t utag; ltag = str.find("</table>"); utag = str.find("</TABLE>"); string stag; string etag; if(int(ltag) > -1) { stag = "<table"; etag = "/table"; } else if(int(ltag) < 0 && int(utag) > -1) { stag = "<TABLE"; etag = "/TABLE"; } else if(int(ltag) == -1 && int(utag) == -1) { printf("Requested page has no table element!\n"); return -1; } size_t spos; spos = str.find(stag); size_t epos; epos = str.find(etag); spos -= 1; epos += 7; string xmlstr; xmlstr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; xmlstr += str.substr(spos, epos-spos); CComBSTR cbstrData(xmlstr.c_str()); BSTR bstrXml = cbstrData.Copy(); SysFreeString(cbstrData); xmlstr=""; HRESULT hr; hr = CoInitialize(NULL); if(FAILED (hr)) { printf("HRESULT Initialization failed!\n"); SysFreeString(bstrXml); return -1; } IXMLDOMDocumentPtr xmlDoc = NULL; xmlDoc.CreateInstance(__uuidof(DOMDocument)); if(xmlDoc==NULL) { printf("DOMDocument creation failed!\n"); SysFreeString(bstrXml); return -1; } xmlDoc->put_async(VARIANT_TRUE); xmlDoc->put_preserveWhiteSpace(VARIANT_TRUE); xmlDoc->put_validateOnParse(VARIANT_FALSE); VARIANT_BOOL _result; hr = xmlDoc->raw_loadXML(bstrXml, &_result); if(FAILED (hr)) { printf("Loading .xml data failed!\n"); SysFreeString(bstrXml); return -1; } _bstr_t xmldata; xmldata = xmlDoc->Getxml(); string chkstr = OLE2CA(xmldata); if(chkstr != "") { SysFreeString(bstrXml); printf(OLE2CA(xmldata)); printf("\n\nXML data successfully loaded to IXMLDocument object!\n"); SysFreeString(xmldata); } else { SysFreeString(xmldata); printf(OLE2CA(bstrXml)); printf("\n\nXML data still not loaded into XMLDocument!\n"); SysFreeString(bstrXml); } return 0; }
Heippa taas E.T.!
elikä siis näin lukisit taulukon nettisivulta ja pukkaisit XML Documenttiin Microsoft Visual C++ 6.0/WinInet API/MSXML6 ympäristössä...
// ReadTableFromPage.cpp : Defines the entry point for the console application. // #define UNICODE #include "stdafx.h" #include <iostream> #include <fstream> #include <string> #include <atlbase.h> #include <atlconv.h> #include <comdef.h> #include <windows.h> #include <wininet.h> #pragma comment(lib,"wininet.lib") #import <msxml6.dll> named_guids #pragma comment(lib,"msxml6.lib") using namespace std; using namespace MSXML2; int main(int argc, char* argv[]) { USES_CONVERSION; char Buf[1024]; DWORD ReadSize; BOOL bResult; string str; HINTERNET hInternet = InternetOpenA( "UsewrAgent/1.0",INTERNET_OPEN_TYPE_PRECONFIG, NULL,NULL,0); if(!hInternet) { printf("hInternet Failed!\n"); return -1; } HINTERNET hConnection = InternetConnectA( hInternet, "http://www.elisanet.fi/nea.fi/tablex", INTERNET_DEFAULT_HTTPS_PORT, "","", INTERNET_SERVICE_HTTP,0,0); if (!hConnection) { InternetCloseHandle(hInternet); printf("InternetConnectA failed!\n"); return -1; } HINTERNET hFile = InternetOpenUrl( hInternet, "http://www.elisanet.fi/nea.fi/tablex", NULL,0,INTERNET_FLAG_RELOAD,0); if(!hFile) { InternetCloseHandle(hConnection); InternetCloseHandle(hInternet); printf("InternetOpenUrl Failed!\n"); return -1; } for(;;) { ReadSize = 1024; bResult = InternetReadFile( hFile,Buf,1024,&ReadSize); if(bResult && (ReadSize == 0)) break; Buf[ReadSize] = '\0'; str+=("%s", Buf); } InternetCloseHandle(hFile); InternetCloseHandle(hConnection); InternetCloseHandle(hInternet); size_t ltag; size_t utag; ltag = str.find("</table>"); utag = str.find("</TABLE>"); string stag; string etag; if(int(ltag) > -1) { stag = "<table"; etag = "/table"; } else if(int(ltag) < 0 && int(utag) > -1) { stag = "<TABLE"; etag = "/TABLE"; } else if(int(ltag) == -1 && int(utag) == -1) { printf("Requested page has no table element!\n"); return -1; } size_t spos; spos = str.find(stag); size_t epos; epos = str.find(etag); spos -= 1; epos += 7; string xmlstr; xmlstr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; xmlstr += str.substr(spos, epos-spos); CComBSTR cbstrData(xmlstr.c_str()); BSTR bstrXml = cbstrData.Copy(); SysFreeString(cbstrData); xmlstr=""; HRESULT hr; hr = CoInitialize(NULL); if(FAILED (hr)) { printf("HRESULT Initialization failed!\n"); SysFreeString(bstrXml); return -1; } IXMLDOMDocumentPtr xmlDoc = NULL; xmlDoc.CreateInstance(__uuidof(DOMDocument)); if(xmlDoc==NULL) { printf("DOMDocument creation failed!\n"); SysFreeString(bstrXml); return -1; } xmlDoc->put_async(VARIANT_TRUE); xmlDoc->put_preserveWhiteSpace(VARIANT_TRUE); xmlDoc->put_validateOnParse(VARIANT_FALSE); VARIANT_BOOL _result; hr = xmlDoc->raw_loadXML(bstrXml, &_result); if(FAILED (hr)) { printf("Loading .xml data failed!\n"); SysFreeString(bstrXml); return -1; } _bstr_t xmldata; xmldata = xmlDoc->Getxml(); string chkstr = OLE2CA(xmldata); if(chkstr != "") { SysFreeString(bstrXml); printf(OLE2CA(xmldata)); printf("\n\nXML data successfully loaded to IXMLDocument object!\n"); SysFreeString(xmldata); } else { SysFreeString(xmldata); printf(OLE2CA(bstrXml)); printf("\n\nXML data still not loaded into XMLDocument!\n"); SysFreeString(bstrXml); } return 0; }
Heippa taas!
tässä vielä sama jutska ilman WinInet API'a...
// XMLHTTPRequestSample.cpp : Defines the entry point for the console application. // #define UNICODE #include "stdafx.h" #include <iostream> #include <string> #include <atlbase.h> #include <windows.h> #import <msxml6.dll> named_guids #pragma comment(lib,"msxml6.lib") using namespace std; using namespace MSXML2; int main(int argc, char* argv[]) { USES_CONVERSION; HRESULT hr; hr = CoInitialize(NULL); if(FAILED (hr)) { printf("HRESULT Initialization failed!\n"); return -1; } bstr_t sUrl = "http://www.elisanet.fi/nea.fi/tablex/index.html"; IXMLHTTPRequestPtr xmlRequest = NULL; xmlRequest.CreateInstance(__uuidof(XMLHTTP60)); if(xmlRequest==NULL) { printf("XMLHTTPRequest object creation failed!\n"); return -1; } hr = xmlRequest->open(_bstr_t(_T("POST")), sUrl, VARIANT_FALSE); f(FAILED (hr)) { cout << "XMLRequest open failed!" << endl; return -1; } xmlRequest->setRequestHeader((bstr_t)"Content-Type", (bstr_t)"text/xml"); xmlRequest->send(); if(200 == xmlRequest->status) { string rawstr = W2A(xmlRequest->responseText); size_t ltag; size_t utag; ltag = rawstr.find("</table>"); utag = rawstr.find("</TABLE>"); string stag; string etag; if(int(ltag) > -1) { stag = "<table"; etag = "/table"; } else if(int(ltag) < 0 && int(utag) > -1) { stag = "<TABLE"; etag = "/TABLE"; } else if(int(ltag) == -1 && int(utag) == -1) { printf("Requested page has no table element!\n"); return -1; } string rTag = stag.erase(0,1); size_t spos; spos = rawstr.find(stag); size_t epos; epos = rawstr.find(etag); spos -= 1; epos += 7; string xmlstr; xmlstr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; xmlstr += rawstr.substr(spos, epos-spos); CComBSTR cbstrData(xmlstr.c_str()); BSTR bstrXml = cbstrData.Copy(); SysFreeString(cbstrData); rawstr=""; xmlstr=""; CComBSTR cbstrTag(rTag.c_str()); BSTR bstrTag = cbstrTag; SysFreeString(cbstrTag); IXMLDOMDocumentPtr xmlDoc = NULL; xmlDoc.CreateInstance(__uuidof(DOMDocument)); if(xmlDoc==NULL) { printf("DOMDocument creation failed!\n"); SysFreeString(bstrXml); return -1; } xmlDoc->put_async(VARIANT_TRUE); xmlDoc->put_preserveWhiteSpace(VARIANT_TRUE); xmlDoc->put_validateOnParse(VARIANT_FALSE); VARIANT_BOOL _result; xmlDoc->raw_loadXML(bstrXml, &_result); ::SysFreeString(bstrXml); if(!_result) { cout << "Loading XML data failed!" << endl; return -1; } IXMLDOMNodePtr xmlNode; cout << "Here's the whole XML:" << endl; cout << W2A(xmlDoc->xml) << endl; cout << "And here's the table:" << endl; xmlNode = xmlDoc->selectSingleNode(bstrTag); cout << W2A(xmlNode->xml) << endl; } else { printf(_T("\nError: %s\n"), W2A(xmlRequest->statusText)); } return 0; }
... ja tässä kokonaan ilman Windowsia :), käyttäen cURLpp- ja TinyXML-kirjastoja. Ikävien C-(tyylisten) rajapintojen käsittelyltä voi monesti välttyä käyttämällä muiden tekemiä valmiita wrappereita.
#include <curlpp/Options.hpp> #include <tinyxml.h> #include <sstream> int main() { try { // Alustetaan cURLpp (RAII) curlpp::Cleanup curlppHandle; // Luetaan X(HT)ML-data puskuriin std::stringstream bufstream; bufstream << curlpp::options::Url("http://osoite"); // Parsitaan XML-data TiXmlDocument doc; bufstream >> doc; // ... hae tarvittava tieto XML-tietorakenteesta "doc" } catch(curlpp::RuntimeError & e) { // cURL:in virheilmoitukset std::cerr << e.what() << std::endl; } return 0; }
Työläin osa on taulukon parsiminen haluamaasi muotoon puumaisesta XML-tietorakenteesta. Tämä operaatio puuttuu sekä tästä että neau33:n koodista.
EDIT: neau33, koodisi ei muuten myöskään löydä koko taulukko-osaa, jos sen sisällä on toisia taulukoita.
Heippa taas!
No oli miten oli, mutta tässä vielä samaa paskaa VC++.NET ympäristössä...
// XMLHTTPRequestSample_VC++NET.cpp : Defines the entry point for the console application. // #include "stdafx.h" #include <iostream> #include <atlbase.h> #include <windows.h> #import <msxml6.dll> named_guids #pragma comment(lib,"msxml6.lib") #pragma warning( disable: 4018 4786) #include <string> #pragma warning( pop ) using namespace std; using namespace MSXML2; int _tmain(int argc, _TCHAR* argv[]) { USES_CONVERSION; HRESULT hr; hr = CoInitialize(NULL); if(FAILED (hr)) { printf("HRESULT Initialization failed!\n"); return -1; } bstr_t sUrl = "http://www.elisanet.fi/nea.fi/tablex/index.html"; IXMLHTTPRequestPtr xmlRequest = NULL; xmlRequest.CreateInstance(__uuidof(XMLHTTP60)); if(xmlRequest==NULL) { printf("XMLHTTPRequest object creation failed!\n"); return -1; } hr = xmlRequest->open(_bstr_t(_T("POST")), sUrl, VARIANT_FALSE); if(FAILED (hr)) { cout << "XMLRequest open failed!" << endl; return -1; } xmlRequest->setRequestHeader((bstr_t)"Content-Type", (bstr_t)"text/xml"); xmlRequest->send(); if(200 == xmlRequest->status) { string rawstr = W2A(xmlRequest->responseText); size_t ltag; size_t utag; ltag = rawstr.find("</table>"); utag = rawstr.find("</TABLE>"); string stag; string etag; if(int(ltag) > -1) { stag = "<table"; etag = "/table"; } else if(int(ltag) < 0 && int(utag) > -1) { stag = "<TABLE"; etag = "/TABLE"; } else if(int(ltag) == -1 && int(utag) == -1) { printf("Requested page has no table element!\n"); return -1; } string rTag = stag.erase(0,1); size_t spos; spos = rawstr.find(stag); size_t epos; epos = rawstr.find(etag); spos -= 1; epos += 7; string xmlstr; xmlstr = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; xmlstr += rawstr.substr(spos, epos-spos); CComBSTR cbstrData(xmlstr.c_str()); BSTR bstrXml = cbstrData.Copy(); SysFreeString(cbstrData); rawstr=""; xmlstr=""; CComBSTR cbstrTag(rTag.c_str()); BSTR bstrTag = cbstrTag; SysFreeString(cbstrTag); CComPtr<::IXMLDOMDocument> xmlDoc; xmlDoc.CoCreateInstance(__uuidof(::DOMDocument)); if(xmlDoc==NULL) { printf("DOMDocument creation failed!\n"); SysFreeString(bstrXml); return -1; } xmlDoc->put_async(VARIANT_FALSE); VARIANT_BOOL _result; xmlDoc->loadXML(bstrXml, &_result); ::SysFreeString(bstrXml); if(!_result) { cout << "loading XML data failed!" << endl; return -1; } ::IXMLDOMNodePtr xmlNode; BSTR tempXml; hr = xmlDoc->get_xml(&tempXml); if(FAILED (hr)) { ::SysFreeString(tempXml); ::SysFreeString(bstrTag); cout << "Retrieving XML data failed!" << endl; return -1; } cout << "Here's the whole XML:" << endl; cout << W2A(tempXml) << endl; ::SysFreeString(tempXml); ::IXMLDOMNode *pNode; hr = xmlDoc->selectSingleNode(bstrTag, &pNode); if(FAILED (hr)) { ::SysFreeString(tempXml); ::SysFreeString(bstrTag); cout << "Root node not found!" << endl; return -1; } hr = pNode->get_xml(&tempXml); if(FAILED (hr)) { ::SysFreeString(tempXml); ::SysFreeString(bstrTag); cout << "Retrievin XML data failed!" << endl; return -1; } cout << "And here's the table:" << endl; cout << W2A(temp2Xml) << endl; ::SysFreeString(tempXml); ::SysFreeString(bstrTag); } else { printf("\nError: %s\n", W2A(xmlRequest->statusText)); } return 0; }
Heippa taas!
tässä vielä samaa paskaa VC++.NET/WinForms ympäristössä...(No need for XML)
MSHTML_Sample:
Lomakkeelle (Form1):
GroupBox kontrolli (groupBox1)
WebBrowser kontrolli (webBrowser1) ->(groupBoxiin)
DataGrid kontrolli (dataGrid1)
sekä nappi (button1)
Referenssi Microsoft.mshtml.dll
löytyy seuraavista Office PIAs paketeista:
(Microsoft Office Primary Interop Assemblies)
Office XP PIAs
Office 2003 Update: Redistributable Primary Interop Assemblies
2007 Microsoft Office System Update: Redistributable Primary Interop Assemblies
Microsoft Office 2010: Primary Interop Assemblies Redistributable
//Form1.h #pragma once namespace MSHTML_Sample { using namespace mshtml; using namespace System; using namespace System::ComponentModel; using namespace System::Collections; using namespace System::Windows::Forms; using namespace System::Data; using namespace System::Drawing; /// <summary> /// Summary for Form1 /// </summary> public ref class Form1 : public System::Windows::Forms::Form { public: Form1(void) { InitializeComponent(); // //TODO: Add the constructor code here // } protected: /// <summary> /// Clean up any resources being used. /// </summary> ~Form1() { if (components) { delete components; } } private: System::Windows::Forms::GroupBox^ groupBox1; protected: private: System::Windows::Forms::WebBrowser^ webBrowser1; private: cli::array<System::Data::DataTable^>^ tableArray; private: mshtml::HTMLDocument^ pDoc; private: mshtml::IHTMLElementCollection^ htmlTables; private: System::Windows::Forms::Button^ button1; private: System::Windows::Forms::DataGrid^ dataGrid1; private: /// <summary> /// Required designer variable. /// </summary> System::ComponentModel::Container ^components; //Windows Form Designer generated code... private: System::Void Form1_Load( System::Object^ sender, System::EventArgs^ e) { this->groupBox1->Visible = false; this->button1->Enabled = false; } private: System::Void webBrowser1_DocumentCompleted(System::Object^ sender, System::Windows::Forms::WebBrowserDocumentCompletedEventArgs^ e) { pDoc=(HTMLDocument^)webBrowser1->Document->DomDocument; while(pDoc->readyState != "complete"){} htmlTables = pDoc->getElementsByTagName("table"); this->button1->Enabled = true; } private: System::Void Form1_Shown( System::Object^ sender, System::EventArgs^ e) { //testi... this->webBrowser1->Url = gcnew Uri( "http://www.elisanet.fi/nea.fi/tablex/index.html"); } private: System::Void button1_Click( System::Object^ sender, System::EventArgs^ e) { if(htmlTables->length > 0) { int tables = htmlTables->length; tableArray = gcnew cli::array<System::Data::DataTable^>(tables); for(int i = 0; i < tables; ++i) { tableArray[i] = gcnew DataTable; IHTMLDOMNode^ tableNode; tableNode = (IHTMLDOMNode^)htmlTables->item(i, nullptr); IHTMLDOMNode^ bodyNode; bodyNode = (IHTMLDOMNode^)tableNode->firstChild; if(bodyNode->nodeName == "TBODY") { IHTMLDOMChildrenCollection^ trNodes; trNodes = (IHTMLDOMChildrenCollection^)bodyNode->childNodes; int rows = trNodes->length; if(rows < 0) {break;} for(int j=0;j<rows;++j) { IHTMLDOMNode^ tr; tr = (IHTMLDOMNode^)trNodes->item(j); if(tr->nodeName == "TR") { IHTMLDOMChildrenCollection^ tdNodes; tdNodes = (IHTMLDOMChildrenCollection^)tr->childNodes; int columns = tdNodes->length; if(columns < 0){break;} DataRow^ drow; drow = tableArray[i]->NewRow(); for(int k = 0;k < columns; ++k) { if(tableArray[i]->Columns->Count == 0) { for(int l=0;l<columns;++l) { int^ colindex; colindex = l + 1; String^ strInt; strInt = colindex->ToString(); String^ colName = "Column"; colName += strInt; tableArray[i]->Columns->Add( colName, String::typeid); } } IHTMLDOMNode^ td; td = (IHTMLDOMNode^)tdNodes->item(k); if(td->nodeName == "TD") { IHTMLElement^ elem; elem = (IHTMLElement^)td; drow->default[k]= elem->innerText; } } tableArray[i]->Rows->Add(drow); } } } } } if(tableArray->Length > 0) { this->dataGrid1->DataSource = tableArray[0]; } } }; }
-Nea-
Heippa taas!
tässä vielä samaa paskaa SharpDevelop 4.0/VC++.NET/Console ympäristössä...
(No need for WebBrowser, no need for XML)
// VC++.NET MFC Console Application //(väännetty SharpDevelop 4.0:lla) using namespace mshtml; using namespace System; using namespace System::Threading; using namespace System::Data; using namespace Microsoft::VisualStudio::OLE::Interop; int main(array<System::String ^> ^args) { String^ url = "http://www.elisanet.fi/nea.fi/tablex/index.html"; HTMLDocument^ objMSHTML; IHTMLDocument2^ objDocument; IPersistStreamInit^ ips; objMSHTML = gcnew HTMLDocument; ips = (IPersistStreamInit^)objMSHTML; ips->InitNew(); objDocument = objMSHTML->createDocumentFromUrl(url, ""); while(objDocument->readyState != "complete") { Thread::Sleep(100); } objMSHTML = nullptr; HTMLDocument^ theDoc = (HTMLDocument^)objDocument; IHTMLElementCollection^ htmlTables; htmlTables = theDoc->getElementsByTagName("table"); int tables = htmlTables->length; if(tables==0) { Console::Write("Requested page has no table elements!"); return -1; } cli::array<DataTable^>^ TableArray = gcnew cli::array<DataTable^>(tables); //tallennetaan HTML taulukoiden data datataulukoihin for(int i=0;i<tables;++i) { TableArray[i] = gcnew DataTable(); IHTMLDOMNode^ tableNode = nullptr; tableNode =(IHTMLDOMNode^)htmlTables->item(i, nullptr); IHTMLDOMNode^ bodyNode = nullptr; bodyNode = (IHTMLDOMNode^)tableNode->firstChild; String^ bodyTag = bodyNode->nodeName; if(bodyTag->ToUpper() == "TBODY") { IHTMLDOMChildrenCollection^ trNodes = nullptr; trNodes = (IHTMLDOMChildrenCollection^)bodyNode->childNodes; int rows = trNodes->length; if(rows == 0){break;} for(int j=0;j<rows;++j) { IHTMLDOMNode^ tr = nullptr; tr = (IHTMLDOMNode^)trNodes->item(j); String^ trTag = tr->nodeName; if(trTag->ToUpper() == "TR") { IHTMLDOMChildrenCollection^ tdNodes = nullptr; tdNodes = (IHTMLDOMChildrenCollection^)tr->childNodes; int cols = tdNodes->length; if(cols == 0){break;} DataRow^ drow = nullptr; drow = TableArray[i]->NewRow(); for(int k=0;k<cols;++k) { if(TableArray[i]->Columns->Count == 0) { for(int l=0;l<cols;++l) { int^ colindex = l + 1; String^ strInt = colindex->ToString(); String^ colName = "Column"; colName += strInt; TableArray[i]->Columns->Add(colName, String::typeid); } } IHTMLDOMNode^ td = nullptr; td =(IHTMLDOMNode^)tdNodes->item(k); String^ tdTag = td->nodeName; if(tdTag->ToUpper() == "TD") { IHTMLElement^ cell = nullptr; cell =(IHTMLElement^)td; drow->default[k]= cell->innerText; } } TableArray[i]->Rows->Add(drow); } } } } //tulostetaan datataulukoiden data if(TableArray->Length > 0) { int tables = TableArray->Length; int^ tblcount = tables; Console::Clear(); Console::Write(Environment::NewLine); for(int i=0;i<tables;++i) { int^ tblindex = i + 1; String^ header = "Table "; header += tblindex->ToString(); header += "/" + tblcount->ToString(); Console::Write(header + "\n"); int rows = TableArray[i]->Rows->Count; int cols = TableArray[i]->Columns->Count; for(int j=0;j<rows;++j) { for(int k=0;k<cols;++k) { Console::Write(TableArray[i]->Rows[j][k]); if(k<cols) { Console::Write(" | "); } } Console::Write(Environment::NewLine); } Console::Write(Environment::NewLine); } } Console::Write("Press key Enter to exit...\n"); Console::Read(); return 0; }
Coding war is dangerous thing!
Aihe on jo aika vanha, joten et voi enää vastata siihen.