とりあえず、データ取得と簡単な加工ツールができた。いままでRSIはここ(http://kaburobo.xii.jp/yh/html2/rsi.html)に頼ってたので、一日遅れになってたけど、これからは当日の仕事後に見られるようになる。

必要なものはcygwinJDKJREExcel または Ooo

あと株式銘柄コード一覧を下からとってきてコードだけの一覧(code.txt)にする。
http://www.geocities.co.jp/WallStreet-Stock/9256/data2010.htm

[使い方]

・データ取得
% ./GetYahoo.java 2010 01 15

・データ加工
直近15日間のデータを元に銘柄コードと14日RSI、あと窓が開いてれば窓の幅と最終日終値出来高csv出力する。
売買がなかった日は適当に補間。なんでRSI=0とか変な値になることもあり。

% GetMadoRSI.sh

[ソースコードその他]
GetMadoRSI.sh

    • -

#!/bin/bash
set -x
FILE=(`ls 20??????.txt | sort | tail -15`)

RESULT_NAME=`echo ${FILE[14]} | sed -e 's/\.txt//'`
./GetMadoRSI.exe `echo ${FILE[@]}` 2> $RESULT_NAME.err > $RESULT_NAME.csv

    • -

GetYahoo.sh

    • -

#!/bin/sh
START=`ls *.dat | sort | tail -1 | sed -e 's/\.dat//'`
echo > GetStart
while [ true ];
do
java GetYahoo $1 $2 $3 $START
if [ -f GetStart ]; then
START=`ls *.dat | sort | tail -1 | sed -e 's/\.dat//'`
sleep 60
else
cat *.dat | sort > $1$2$3".txt"
tar czf $1$2$3.tgz *.dat
rm -f *.dat
exit
fi
done

    • -

GetYahoo.java
※コード中のタグがそのまま表示に影響するので一部“<”->“<”、“(”->“(”に要変更。

    • -

import java.net.*;
import java.io.*;

class GetYahoo {
static final String strURL= "http://table.yahoo.co.jp/t?";

public static void main(String[] args) throws IOException {
String url = strURL;
String Code;
int LineNum;
URL targetURL;
String FileName;
boolean blDelFile;
String Year = args[0];
String Month = args[1];
String Day = args[2];
String Num;
String Start;

Start = "0000";
if( args.length == 4 )
Start = args[3];

FileInputStream fi = new FileInputStream("code.txt");
BufferedReader br = new BufferedReader(new InputStreamReader(fi));
while( (Code = br.readLine()) != null ) {
if( Integer.parseInt(Code) < Integer.parseInt(Start) )
continue;
url = "http://table.yahoo.co.jp/t?";
url = url + "c=" + Year + "&a=" + Month + "&b=" + Day;
url = url + "&f=" + Year + "&d=" + Month + "&e=" + Day;
url = url + "&s=" + Code + "&y=0&z=&x=s";
System.out.println( url );
targetURL = new URL(url);

try{
Thread.sleep(500);
} catch(InterruptedException e){}

InputStream in0 = targetURL.openStream();
try {
BufferedReader in =
new BufferedReader(new InputStreamReader(in0, "JISAutoDetect"));
String line;
String result = "";
blDelFile = false;
LineNum = 1;
while((line = in.readLine()) != null) {
Num = "";
if( line.startsWith( "<td><small><b>" ) ) {
Num = line.replaceAll( "[^0-9,]", "" );
if( line.length() - Num.length() != 31 )
Num = "";
else
Num = line.replaceAll( "[^0-9]", "" );
} else if( line.startsWith( "<td><small>" ) ) {
Num = line.replaceAll( "[^0-9,]", "" );
if( line.length() - Num.length() != 24 )
Num = "";
else
Num = line.replaceAll( "[^0-9]", "" );
}
if( Num.length() > 0 )
result = result + Num + "\t";
}
if( result.length() > 0 ) {
FileName = Code + ".dat";
FileOutputStream fo = new FileOutputStream( FileName );
PrintStream ps = new PrintStream(fo);
ps.println( Code + "\t" + result );
ps.close();
fo.close();
}
} finally {
System.out.println( Code + "==========FINISH==============");
in0.close();
}
}
File DelFile = new File( "GetStart" );
DelFile.delete();
}
}

    • -

GetMadoRSI.c

    • -

#include
#include
#include
#include
#include
#include

struct tbl {
int code;
int start;
int max;
int min;
int end;
int taka;
double age;
double sage;
double rsi;
int mado;
int file_num;
};

static void skip_first();
static void skip_last();
static int get_num();
static void get_name( char * );

int offset;
int off_max;
char buf[204800];
int file_num;

extern
int
main( int argc, char *argv[] )
{
struct tbl *tbl_p[256];
int i,j;
int fd;
int diff;
char fname[16];
int code;
char name_buf[256];
int begin;
int max;
int min;
int end;

if( argc < 2 ) {
fprintf( stderr, "file number must be lt 2\n" );
return(-1);
}
file_num = argc - 1;

memset( (char *)tbl_p, 0x00, sizeof(tbl_p) );
for( i = 0; i < file_num; i++ ) {
tbl_p[i] = (struct tbl *)malloc( sizeof(struct tbl) * 10000 );
if( NULL == tbl_p[i] ) {
printf( "malloc error(%s)\n", strerror(errno) );
}
memset( tbl_p[i], 0x00, sizeof(struct tbl) * 10000 );
}

/*
* read data files, and set number to tbl_p[file#]
*/
for( i = 0; i < file_num; i++ ) {
sprintf( fname, "%s\0", argv[i+1] );
fd = open( fname, O_RDONLY );
if( fd <= 0 ) {
fprintf( stderr, "file: %s open error.\n", fname );
close(fd);
return( -1 );
}
memset( buf, 0x00, sizeof(buf) );
off_max = read( fd, buf, sizeof(buf) );
if( off_max <= 0 ) {
fprintf( stderr,
"file %s read error(%s)\n", fname, strerror(errno) );
return( -1 );
}
/*fprintf( stderr, "info: read %d bytes\n", off_max );*/

#ifdef DEBUG
printf( "read %s: %d(bytes)\n", fname, off_max );
#endif
offset = 0;
for(;;) {
if( offset >= off_max )
break;
/*
* code
*/
code = get_num();
if( code < 0 )
break;

tbl_p[0][code].code = code;
tbl_p[i][code].code = code;
tbl_p[0][code].file_num++;
if( code == 4 )
fprintf( stderr, "%s offset=%d\n", fname, offset );
#ifdef DEBUG
printf( "code: %d\n", code );
#endif

/*
* begin
*/
tbl_p[i][code].start = get_num();
#ifdef DEBUG
printf( "begin: %d\n", begin );
#endif

/*
* max
*/
tbl_p[i][code].max = get_num();
#ifdef DEBUG
printf( "max: %d\n", max );
#endif

/*
* min
*/
tbl_p[i][code].min = get_num();
#ifdef DEBUG
printf( "min: %d\n", min );
#endif

/*
* end
*/
tbl_p[i][code].end = get_num();
#ifdef DEBUG
printf( "end: %d\n", end );
#endif
/*
* baibai daka;
*/
tbl_p[i][code].taka = get_num();

/*
* 終値は調整後終値にする。
*/
tbl_p[i][code].end = get_num();
}
close(fd);
}

/*
* RSI算出前に、未売買日のデータを補正する。
*/
for( i = 0; i < file_num; i++ ) {
for( code = 0; code < 10000; code ++ ) {
if( 0 == tbl_p[i][code].end ) {
/*
* 初日ならば最初の商い日の値で補正する。
* 初日でなければ前日終値を持ってくる。
*/
if( 0 == i ) {
for( j = 1; j < file_num; j++ ) {
if( tbl_p[j][code].end != 0 ) {
tbl_p[i][code].start = tbl_p[j][code].end;
tbl_p[i][code].max = tbl_p[j][code].end;
tbl_p[i][code].min = tbl_p[j][code].end;
tbl_p[i][code].end = tbl_p[j][code].end;
break;
}
}
} else {
tbl_p[i][code].start = tbl_p[i-1][code].end;
tbl_p[i][code].max = tbl_p[i-1][code].end;
tbl_p[i][code].min = tbl_p[i-1][code].end;
tbl_p[i][code].end = tbl_p[i-1][code].end;
}
}
}
}

/*
* calculate RSI
*/
for( code = 0; code < 10000; code ++ ) {
if( 0 == tbl_p[0][code].code )
continue;
for( i = 0; i < file_num - 1; i++ ) {
diff = tbl_p[i+1][code].end - tbl_p[i][code].end;
if( diff > 0 )
tbl_p[0][code].age += diff;
else
tbl_p[0][code].sage -= diff;
}
}
for( code = 0; code < 10000; code++ ) {
if( 0 == tbl_p[0][code].code )
continue;
if( 0 == tbl_p[0][code].age && 0 == tbl_p[0][code].sage ) {
tbl_p[0][code].rsi = 0;
continue;
}
tbl_p[0][code].rsi = tbl_p[0][code].age /
(tbl_p[0][code].age + tbl_p[0][code].sage) * 100;
}

for( code = 0; code < 10000; code++ ) {
if( 0 == tbl_p[0][code].code )
continue;
if( 0 != tbl_p[0][code].rsi )
continue;
fprintf( stderr, "debug: code[%d] RSI=0\n", code );
for( i = 0; i < file_num; i++ ) {
fprintf( stderr,
" day[%d]: start=%d max=%d min=%d end=%d age=%3.3f sage=%3.3f\n",
i,
tbl_p[i][code].start,
tbl_p[i][code].max,
tbl_p[i][code].min,
tbl_p[i][code].end,
tbl_p[i][code].age,
tbl_p[i][code].sage );
}
}

/*
* mado check
*/
for( code = 0; code < 10000; code++ ) {
if( tbl_p[file_num-1][code].min <= 0 )
continue;
if( tbl_p[file_num-2][code].max <= 0 )
continue;
diff = tbl_p[file_num-1][code].min - tbl_p[file_num-2][code].max;
if( diff > 0 )
tbl_p[0][code].mado = diff;
}

/*
* print last result
*/
printf( "コード, RSI, 窓幅, 終値, 出来高, 未売買日数\n" );
for( code = 0; code < 10000; code++ ) {
if( 0 == tbl_p[0][code].code )
continue;
if( tbl_p[0][code].file_num != file_num ) {
fprintf( stderr, "code[%d] missing some data\n", code );
/*tbl_p[0][code].rsi = -100;*/
}
printf( "%d, %3.3lf, %d, %d, %d, %d\n",
tbl_p[0][code].code,
tbl_p[0][code].rsi,
tbl_p[0][code].mado,
tbl_p[file_num-1][code].end,
tbl_p[file_num-1][code].taka,
file_num - tbl_p[0][code].file_num
);
}
return;
}

static void skip_first()
{
for( offset = 0; ; offset++ ) {
if( buf[offset] == 0x0a )
break;
}
offset++;
return;
}

static void skip_last()
{
for( ;; offset++ ) {
if( 0x0a == buf[offset] || 0 == buf[offset] )
break;
}
offset++;
return;
}

static int get_num()
{
int num_found;
char num_buf[16];

memset( num_buf, 0x00, sizeof(num_buf) );
num_found = 0;
for(;;) {
if( offset >= off_max )
return( -1 );

if( !isdigit(buf[offset]) ) {
if( 0 == num_found ) {
offset++;
continue;
}
offset++;
return( atoi(num_buf) );
}
num_buf[num_found++] = buf[offset++];
}
}

static void get_name( char *ret_buf )
{
int letter_found;

memset( ret_buf, 0x00, 256 );
letter_found = 0;
for(;;) {
if( '\t' == buf[offset] ) {
if( 0 == letter_found ) {
offset++;
continue;
}
offset++;
return;
}
ret_buf[letter_found++] = buf[offset++];
}
}

    • -