Skip to content

Commit 705bf83

Browse files
committed
Code style fixes
Change-Id: I402c69f8d1a9f4d8bf3515d220c2ae612d9de404
1 parent 198bb0c commit 705bf83

File tree

5 files changed

+94
-85
lines changed

5 files changed

+94
-85
lines changed

TextCat.php

Lines changed: 51 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,17 @@ public function setMinFreq( $minFreq ) {
4747
/**
4848
* @param string $dir
4949
*/
50-
public function __construct($dir = null) {
51-
if(empty($dir)) {
50+
public function __construct( $dir = null ) {
51+
if ( empty( $dir ) ) {
5252
$dir = __DIR__."/LM";
5353
}
5454
$this->dir = $dir;
55-
foreach(new DirectoryIterator($dir) as $file) {
56-
if(!$file->isFile()) {
55+
foreach ( new DirectoryIterator( $dir ) as $file ) {
56+
if ( !$file->isFile() ) {
5757
continue;
5858
}
59-
if($file->getExtension() == "lm") {
60-
$this->langFiles[$file->getBasename(".lm")] = $file->getPathname();
59+
if ( $file->getExtension() == "lm" ) {
60+
$this->langFiles[$file->getBasename( ".lm" )] = $file->getPathname();
6161
}
6262
}
6363
}
@@ -68,43 +68,45 @@ public function __construct($dir = null) {
6868
* @param int $maxNgrams How many ngrams to use.
6969
* @return int[]
7070
*/
71-
public function createLM($text, $maxNgrams) {
71+
public function createLM( $text, $maxNgrams ) {
7272
$ngram = array();
73-
foreach(preg_split("/[{$this->wordSeparator}]+/u", $text) as $word) {
74-
if(empty($word)) {
73+
foreach ( preg_split( "/[{$this->wordSeparator}]+/u", $text ) as $word ) {
74+
if ( empty( $word ) ) {
7575
continue;
7676
}
7777
$word = "_".$word."_";
78-
$len = mb_strlen($word, "UTF-8");
79-
for($i=0;$i<$len;$i++) {
78+
$len = mb_strlen( $word, "UTF-8" );
79+
for ( $i=0;$i<$len;$i++ ) {
8080
$rlen = $len - $i;
81-
if($rlen > 4) {
82-
@$ngram[mb_substr($word, $i, 5, "UTF-8")]++;
81+
if ( $rlen > 4 ) {
82+
@$ngram[mb_substr( $word, $i, 5, "UTF-8" )]++;
8383
}
84-
if($rlen > 3) {
85-
@$ngram[mb_substr($word, $i, 4, "UTF-8")]++;
84+
if ( $rlen > 3 ) {
85+
@$ngram[mb_substr( $word, $i, 4, "UTF-8" )]++;
8686
}
87-
if($rlen > 2) {
88-
@$ngram[mb_substr($word, $i, 3, "UTF-8")]++;
87+
if ( $rlen > 2 ) {
88+
@$ngram[mb_substr( $word, $i, 3, "UTF-8" )]++;
8989
}
90-
if($rlen > 1) {
91-
@$ngram[mb_substr($word, $i, 2, "UTF-8")]++;
90+
if ( $rlen > 1 ) {
91+
@$ngram[mb_substr( $word, $i, 2, "UTF-8" )]++;
9292
}
93-
@$ngram[mb_substr($word, $i, 1, "UTF-8")]++;
93+
@$ngram[mb_substr( $word, $i, 1, "UTF-8" )]++;
9494
}
9595
}
96-
if($this->minFreq) {
96+
if ( $this->minFreq ) {
9797
$min = $this->minFreq;
98-
$ngram = array_filter($ngram, function ($v) use($min) { return $v > $min; });
98+
$ngram = array_filter( $ngram, function ( $v ) use( $min ) { return $v > $min;
99+
100+
} );
99101
}
100-
uksort( $ngram, function($k1, $k2) use($ngram) {
101-
if($ngram[$k1] == $ngram[$k2]) {
102-
return strcmp($k1, $k2);
102+
uksort( $ngram, function( $k1, $k2 ) use( $ngram ) {
103+
if ( $ngram[$k1] == $ngram[$k2] ) {
104+
return strcmp( $k1, $k2 );
103105
}
104106
return $ngram[$k2] - $ngram[$k1];
105-
});
106-
if(count($ngram) > $maxNgrams) {
107-
array_splice($ngram, $maxNgrams);
107+
} );
108+
if ( count( $ngram ) > $maxNgrams ) {
109+
array_splice( $ngram, $maxNgrams );
108110
}
109111
return $ngram;
110112
}
@@ -114,9 +116,9 @@ public function createLM($text, $maxNgrams) {
114116
* @param string $langFile
115117
* @return int[] Language file data
116118
*/
117-
public function loadLanguageFile($langFile) {
119+
public function loadLanguageFile( $langFile ) {
118120
include $langFile;
119-
array_splice($ranks, $this->maxNgrams);
121+
array_splice( $ranks, $this->maxNgrams );
120122
return $ranks;
121123
}
122124

@@ -125,15 +127,17 @@ public function loadLanguageFile($langFile) {
125127
* @param int[] $ngrams
126128
* @param string $outfile Output filename
127129
*/
128-
public function writeLanguageFile($ngrams, $outfile) {
129-
$out = fopen($outfile, "w");
130+
public function writeLanguageFile( $ngrams, $outfile ) {
131+
$out = fopen( $outfile, "w" );
130132
// write original array as "$ngrams"
131-
fwrite($out, '<?php $ngrams = ' . var_export($ngrams, true) . ";\n");
133+
fwrite( $out, '<?php $ngrams = ' . var_export( $ngrams, true ) . ";\n" );
132134
// write reduced array as "$ranks"
133135
$rank = 1;
134-
$ranks = array_map(function ($x) use(&$rank) { return $rank++; }, $ngrams);
135-
fwrite($out, '$ranks = ' . var_export($ranks, true) . ";\n");
136-
fclose($out);
136+
$ranks = array_map( function ( $x ) use( &$rank ) { return $rank++;
137+
138+
}, $ngrams );
139+
fwrite( $out, '$ranks = ' . var_export( $ranks, true ) . ";\n" );
140+
fclose( $out );
137141
}
138142

139143
/**
@@ -143,29 +147,29 @@ public function writeLanguageFile($ngrams, $outfile) {
143147
* @return int[] Array with keys of language names and values of score.
144148
* Sorted by ascending score, with first result being the best.
145149
*/
146-
public function classify($text, $candidates = null) {
147-
$inputgrams = array_keys($this->createLM($text, $this->maxNgrams));
148-
if($candidates) {
150+
public function classify( $text, $candidates = null ) {
151+
$inputgrams = array_keys( $this->createLM( $text, $this->maxNgrams ) );
152+
if ( $candidates ) {
149153
// flip for more efficient lookups
150-
$candidates = array_flip($candidates);
154+
$candidates = array_flip( $candidates );
151155
}
152156
$results = array();
153-
foreach($this->langFiles as $language => $langFile) {
154-
if($candidates && !isset($candidates[$language])) {
157+
foreach ( $this->langFiles as $language => $langFile ) {
158+
if ( $candidates && !isset( $candidates[$language] ) ) {
155159
continue;
156160
}
157-
$ngrams = $this->loadLanguageFile($langFile);
161+
$ngrams = $this->loadLanguageFile( $langFile );
158162
$p = 0;
159-
foreach($inputgrams as $i => $ingram) {
160-
if( !empty($ngrams[$ingram]) ) {
161-
$p += abs($ngrams[$ingram] - $i);
163+
foreach ( $inputgrams as $i => $ingram ) {
164+
if ( !empty( $ngrams[$ingram] ) ) {
165+
$p += abs( $ngrams[$ingram] - $i );
162166
} else {
163167
$p += $this->maxNgrams;
164168
}
165169
}
166170
$results[$language] = $p;
167171
}
168-
asort($results);
172+
asort( $results );
169173
return $results;
170174
}
171175
}

catus.php

Lines changed: 28 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
*/
55
require_once __DIR__.'/TextCat.php';
66

7-
$options = getopt('a:c:d:f:t:u:l:h');
7+
$options = getopt( 'a:c:d:f:t:u:l:h' );
88

9-
if(isset($options['h'])) {
9+
if ( isset( $options['h'] ) ) {
1010
$help = <<<HELP
1111
{$argv[0]} [-d Dir] [-a Int] [-f Int] [-l Text] [-t Int] [-u Float]
1212
@@ -37,52 +37,54 @@
3737
3838
HELP;
3939
echo $help;
40-
exit(0);
40+
exit( 0 );
4141
}
4242

43-
if(!empty($options['d'])) {
43+
if ( !empty( $options['d'] ) ) {
4444
$dir = $options['d'];
4545
} else {
46-
$dir = dirname(__FILE__)."/LM";
46+
$dir = __DIR__."/LM";
4747
}
4848

49-
$cat = new TextCat($dir);
49+
$cat = new TextCat( $dir );
5050

51-
if(!empty($options['t'])) {
52-
$cat->setMaxNgrams(intval($options['t']));
51+
if ( !empty( $options['t'] ) ) {
52+
$cat->setMaxNgrams( intval( $options['t'] ) );
5353
}
54-
if(!empty($options['f'])) {
55-
$cat->setMinFreq(intval($options['f']));
54+
if ( !empty( $options['f'] ) ) {
55+
$cat->setMinFreq( intval( $options['f'] ) );
5656
}
5757

58-
$input = isset($options['l']) ? $options['l'] : file_get_contents("php://stdin");
59-
if(!empty($options['c'])) {
60-
$result = $cat->classify($input, explode(",", $options['c']));
58+
$input = isset( $options['l'] ) ? $options['l'] : file_get_contents( "php://stdin" );
59+
if ( !empty( $options['c'] ) ) {
60+
$result = $cat->classify( $input, explode( ",", $options['c'] ) );
6161
} else {
62-
$result = $cat->classify($input);
62+
$result = $cat->classify( $input );
6363
}
6464

65-
if(empty($result)) {
65+
if ( empty( $result ) ) {
6666
echo "No match found.\n";
67-
exit(1);
67+
exit( 1 );
6868
}
6969

70-
if(!empty($options['u'])) {
71-
$max = reset($result) * $options['u'];
70+
if ( !empty( $options['u'] ) ) {
71+
$max = reset( $result ) * $options['u'];
7272
} else {
73-
$max = reset($result) * 1.05;
73+
$max = reset( $result ) * 1.05;
7474
}
7575

76-
if(!empty($options['a'])) {
76+
if ( !empty( $options['a'] ) ) {
7777
$top = $options['a'];
7878
} else {
7979
$top = 10;
8080
}
81-
$result = array_filter($result, function ($res) use($max) { return $res < $max; });
82-
if($result && count($result) <= $top) {
83-
echo join(" or ", array_keys($result)) . "\n";
84-
exit(0);
81+
$result = array_filter( $result, function ( $res ) use( $max ) { return $res < $max;
82+
83+
} );
84+
if ( $result && count( $result ) <= $top ) {
85+
echo join( " or ", array_keys( $result ) ) . "\n";
86+
exit( 0 );
8587
} else {
8688
echo "Can not determine language.\n";
87-
exit(1);
88-
}
89+
exit( 1 );
90+
}

composer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"test": [
1919
"parallel-lint . --exclude vendor",
2020
"phpunit tests/",
21-
"phpcs -p -s ."
21+
"phpcs -p -s"
2222
]
2323
}
2424
}

felis.php

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,19 @@
99
// TODO: add option to control model ngram count
1010
$maxNgrams = 4000;
1111

12-
if($argc != 3) {
13-
die("Use $argv[0] INPUTDIR OUTPUTDIR\n");
12+
if ( $argc != 3 ) {
13+
die( "Use $argv[0] INPUTDIR OUTPUTDIR\n" );
1414
}
15-
if(!file_exists($argv[2])) {
16-
mkdir($argv[2], 0755, true);
15+
if ( !file_exists( $argv[2] ) ) {
16+
mkdir( $argv[2], 0755, true );
1717
}
18-
$cat = new TextCat($argv[2]);
18+
$cat = new TextCat( $argv[2] );
1919

20-
foreach(new DirectoryIterator($argv[1]) as $file) {
21-
if(!$file->isFile()) {
20+
foreach ( new DirectoryIterator( $argv[1] ) as $file ) {
21+
if ( !$file->isFile() ) {
2222
continue;
2323
}
24-
$ngrams = $cat->createLM(file_get_contents($file->getPathname()), $maxNgrams);
25-
$cat->writeLanguageFile($ngrams, $argv[2] . "/" . $file->getBasename(".txt") . ".lm");
24+
$ngrams = $cat->createLM( file_get_contents( $file->getPathname() ), $maxNgrams );
25+
$cat->writeLanguageFile( $ngrams, $argv[2] . "/" . $file->getBasename( ".txt" ) . ".lm" );
2626
}
27-
exit(0);
27+
exit( 0 );

phpcs.xml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
<?xml version="1.0"?>
22
<ruleset name="textcat">
3-
<rule ref="vendor/mediawiki/mediawiki-codesniffer/MediaWiki"/>
43
<file>.</file>
54
<exclude-pattern>vendor</exclude-pattern>
65
<exclude-pattern>LM</exclude-pattern>
76
<exclude-pattern>tests</exclude-pattern>
7+
<rule ref="vendor/mediawiki/mediawiki-codesniffer/MediaWiki"/>
8+
<rule ref="Generic.PHP.NoSilencedErrors.Discouraged">
9+
<exclude name="Generic.PHP.NoSilencedErrors.Discouraged"/>
10+
</rule>
811
</ruleset>

0 commit comments

Comments
 (0)