The qualification round is over! I found myself checking other people's code and I decided for the next rounds I'd need some common code to cut short the development time. There is no point in keeping this a secret, because, anyone can read other people's code and *a lot of far better, and really good code* are there if you check the code of the top 20 or so.
I plan to stick with python, however if speed of execution becomes very crucial, this will be my plan B (my plan A - using pypy ;) ).
I am treating each test case as a separate object. And since there is usually no common data shared between test cases, they can and should be parallelized. Pthreads are doing the job well enough, though I will try to check something with OpenMP's #pragma directive.
Note that while using pthreads and treating each test case as completely different gives us some gain, memoizing becomes a little bit more painful.
I also couldn't resist the urge to implement an ugly forAll kind of function. The doWork() is just something to test the performance gain, run this using perf in linux... and read the code and do comment if you have any suggestion, or I'll simply hunt your code down and read and learn from it ;)
#include < iostream >
#include < vector >
#include < pthread.h >
using namespace std;
class TestCase{
private:
int begin,end,result;
public:
TestCase(){begin = end = result = 0;}
TestCase(int x, int y):begin(x),end(y){}
void doWork();
friend ostream & operator < < (ostream & out1,TestCase &t1);
friend istream & operator >> (istream & in1,TestCase &t1);
};
void TestCase::doWork(){
for(int i = 0; i < begin; i++){
for(int j = 0; j < end*10;j++){
for(int k = 0; k < begin; k++){
result += 1;
}
}
}
}
ostream & operator << (ostream & out1,TestCase &t1){
out1<<t1.result;
return out1;
}
istream & operator >> (istream & in1,TestCase &t1){
in1 >> t1.begin >> t1.end;
return in1;
}
inline void forAllTestCases(const int &num_testcases, vector<TestCase *> &tarr, void (*func)(TestCase * tptr)){
int i = 0;
while(num_testcases - i){
func(tarr[i++]);
}
}
void try1(TestCase * tptr){
tptr->doWork();
}
void *doWork(void *threadArg){
TestCase *tmp;
tmp = reinterpret_cast<TestCase *>(threadArg);
tmp->doWork();
}
int main(){
int num_testcases,i;
vector<TestCase *> tarr;
cin>>num_testcases;
//read the input - I usually redirect the stdin
TestCase *tptr;
for(i=0;i<num_testcases;i++){
tptr = new TestCase;
cin >> *tptr;
tarr.push_back(tptr);
}
vector<pthread_t> mythreads(100);
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr,PTHREAD_CREATE_JOINABLE);
void *status;
int rc;
for(i=0;i<num_testcases;i++)
{
rc = pthread_create(&mythreads[i],&attr,doWork, (void *) tarr[i]);
}
pthread_attr_destroy(&attr);
//forAllTestCases(num_testcases,tarr,&try1);
//print the output
for(i = 0; i < num_testcases; i++){
rc = pthread_join(mythreads[i],&status);
cout<<*tarr[i]<<endl;
}
}
I plan to stick with python, however if speed of execution becomes very crucial, this will be my plan B (my plan A - using pypy ;) ).
I am treating each test case as a separate object. And since there is usually no common data shared between test cases, they can and should be parallelized. Pthreads are doing the job well enough, though I will try to check something with OpenMP's #pragma directive.
Note that while using pthreads and treating each test case as completely different gives us some gain, memoizing becomes a little bit more painful.
I also couldn't resist the urge to implement an ugly forAll kind of function. The doWork() is just something to test the performance gain, run this using perf in linux... and read the code and do comment if you have any suggestion, or I'll simply hunt your code down and read and learn from it ;)
#include < iostream >
#include < vector >
#include < pthread.h >
using namespace std;
class TestCase{
private:
int begin,end,result;
public:
TestCase(){begin = end = result = 0;}
TestCase(int x, int y):begin(x),end(y){}
void doWork();
friend ostream & operator < < (ostream & out1,TestCase &t1);
friend istream & operator >> (istream & in1,TestCase &t1);
};
void TestCase::doWork(){
for(int i = 0; i < begin; i++){
for(int j = 0; j < end*10;j++){
for(int k = 0; k < begin; k++){
result += 1;
}
}
}
}
ostream & operator << (ostream & out1,TestCase &t1){
out1<<t1.result;
return out1;
}
istream & operator >> (istream & in1,TestCase &t1){
in1 >> t1.begin >> t1.end;
return in1;
}
inline void forAllTestCases(const int &num_testcases, vector<TestCase *> &tarr, void (*func)(TestCase * tptr)){
int i = 0;
while(num_testcases - i){
func(tarr[i++]);
}
}
void try1(TestCase * tptr){
tptr->doWork();
}
void *doWork(void *threadArg){
TestCase *tmp;
tmp = reinterpret_cast<TestCase *>(threadArg);
tmp->doWork();
}
int main(){
int num_testcases,i;
vector<TestCase *> tarr;
cin>>num_testcases;
//read the input - I usually redirect the stdin
TestCase *tptr;
for(i=0;i<num_testcases;i++){
tptr = new TestCase;
cin >> *tptr;
tarr.push_back(tptr);
}
vector<pthread_t> mythreads(100);
pthread_attr_t attr;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr,PTHREAD_CREATE_JOINABLE);
void *status;
int rc;
for(i=0;i<num_testcases;i++)
{
rc = pthread_create(&mythreads[i],&attr,doWork, (void *) tarr[i]);
}
pthread_attr_destroy(&attr);
//forAllTestCases(num_testcases,tarr,&try1);
//print the output
for(i = 0; i < num_testcases; i++){
rc = pthread_join(mythreads[i],&status);
cout<<*tarr[i]<<endl;
}
}
D/sir,
ReplyDeletewhats the use of threads
If you have a multicore processor, this will use all your cores, compared to one without any threads. Thus if you have two cores, your program will execute twice as fast approximately. If you have a quad core, it almost reduces the time by a quarter. So, even if the algorithm is bad, for large inputs you have chances of scraping by.
ReplyDelete