//: C04:HTMLStripper2.cpp {RunByHand}
// From "Thinking in C++, Volume 2", by Bruce Eckel & Chuck Allison.
// (c) 1995-2004 MindView, Inc. All Rights Reserved.
// See source code use permissions stated in the file 'License.txt',
// distributed with the code package available at www.MindView.net.
//{L} ../C03/ReplaceAll
// Filter to remove html tags and markers.
#include
#include
#include
#include
#include
#include
#include
#include "../C03/ReplaceAll.h"
#include "../require.h"
using namespace std;
string& stripHTMLTags(string& s) throw(runtime_error) {
size_t leftPos;
while((leftPos = s.find('<')) != string::npos) {
size_t rightPos = s.find('>', leftPos+1);
if(rightPos == string::npos) {
ostringstream msg;
msg << "Incomplete HTML tag starting in position "
<< leftPos;
throw runtime_error(msg.str());
}
s.erase(leftPos, rightPos - leftPos + 1);
}
// Remove all special HTML characters
replaceAll(s, "<", "<");
replaceAll(s, ">", ">");
replaceAll(s, "&", "&");
replaceAll(s, " ", " ");
// Etc...
return s;
}
int main(int argc, char* argv[]) {
requireArgs(argc, 1,
"usage: HTMLStripper2 InputFile");
ifstream in(argv[1]);
assure(in, argv[1]);
// Read entire file into string; then strip
ostringstream ss;
ss << in.rdbuf();
try {
string s = ss.str();
cout << stripHTMLTags(s) << endl;
return EXIT_SUCCESS;
} catch(runtime_error& x) {
cout << x.what() << endl;
return EXIT_FAILURE;
}
} ///:~