UdgerParser.cs
1/*
2 UdgerParser - Local parser lib
3
4 UdgerParser class parses useragent strings based on a database downloaded from udger.com
5
6
7 author The Udger.com Team (info@udger.com)
8 copyright Copyright (c) Udger s.r.o.
9 license GNU Lesser General Public License
10 link https://udger.com/products/local_parser
11
12 Third Party lib:
13 ADO.NET Data Provider for SQLite - http://www.sqlite.org/ - Public domain
14 RegExpPerl.cs - https://github.com/DEVSENSE/Phalanger/blob/master/Source/ClassLibrary/RegExpPerl.cs - Apache License Version 2.0
15 */
16
17using System;
18using System.Text;
19using System.Data;
20using System.IO;
21using System.Collections.Generic;
22using System.Runtime.CompilerServices;
23using System.Text.RegularExpressions;
24
25namespace Udger.Parser
26{
27 public class UdgerParser
28 {
29
30 public UserAgent userAgent { get; private set; }
31 public IPAddress ipAddress { get; private set; }
32
33 public string ip { get; set; }
34 public string ua { get; set; }
35
36 #region private Variables
37 private struct IdRegString
38 {
39 public int id;
40 public int wordId1;
41 public int wordId2;
42 public string pattern;
43 }
44
45 private LRUCache<string, UserAgent> cache;
46 private bool useCache;
47 private DataReader dt;
48 private static WordDetector clientWordDetector;
49 private static WordDetector deviceWordDetector;
50 private static WordDetector osWordDetector;
51
52 private static List<IdRegString> clientRegstringList;
53 private static List<IdRegString> osRegstringList;
54 private static List<IdRegString> deviceRegstringList;
55
56 private readonly Dictionary<string, Regex> regexCache = new Dictionary<string, Regex>();
57 private readonly Dictionary<string, string> preparedStmtMap = new Dictionary<string, string>();
58 #endregion
59
60 #region Constructor
64 public UdgerParser()
65 {
66 dt = new DataReader();
67 this.ua = "";
68 this.ip = "";
69 this.useCache = true;
70 cache = new LRUCache<string, UserAgent>(10000);
71
72 }
77 public UdgerParser(int LRUCashCapacity = 10000)
78 {
79 dt = new DataReader();
80 this.ua = "";
81 this.ip = "";
82 this.useCache = true;
83 cache = new LRUCache<string, UserAgent>(LRUCashCapacity);
84
85 }
91 public UdgerParser(bool useLRUCash = true, int LRUCashCapacity = 10000)
92 {
93 this.ua = "";
94 this.ip = "";
95 if (useLRUCash)
96 cache = new LRUCache<string, UserAgent>(LRUCashCapacity);
97
98 this.useCache = useLRUCash;
99 dt = new DataReader();
100
101 }
102 #endregion
103
104 #region setParser method
109 public void SetDataDir(string dataDir)
110 {
111 if (!Directory.Exists(dataDir))
112 throw new Exception("Data dir not found");
113
114 dt.data_dir = dataDir;
115 dt.DataSourcePath = dataDir + @"\udgerdb_v3.dat";
116
117 if (!File.Exists(dt.DataSourcePath))
118 throw new Exception("Data file udgerdb_v3.dat not found");
119 }
125 public void SetDataDir(string dataDir, string fileName)
126 {
127 if (!Directory.Exists(dataDir))
128 throw new Exception("Data dir not found");
129
130 dt.data_dir = dataDir;
131 dt.DataSourcePath = dataDir + @"\" + fileName;
132
133 if (!File.Exists(dt.DataSourcePath))
134 throw new Exception("Data file " + fileName + " not found");
135 }
136 #endregion
137
138 #region public method
142 public void parse()
143 {
144 this.ipAddress = new IPAddress();
145 this.userAgent = new UserAgent();
146 UserAgent uaCache;
147
148 dt.connect(this);
149 UdgerParser.initStaticStructures(dt);
150 if (dt.Connected)
151 {
152 if (this.ua != "")
153 {
154 if (useCache && cache.TryGetValue(this.ua, out uaCache))
155 userAgent = uaCache;
156 else
157 {
158 this.parseUA(this.ua.Replace("'", "''"));
159 this.ua = "";
160 }
161 }
162 if (this.ip != "")
163 {
164 this.parseIP(this.ip.Replace("'", "''"));
165 this.ip = "";
166 }
167
168 }
169 }
170 #endregion
171
172 #region private method
173
174 #region parse
175 private void parseUA(string _userAgent)
176 {
177 int client_id = 0;
178 int client_class_id = -1;
179 int os_id = 0;
180
181 if (!string.IsNullOrEmpty(_userAgent))
182 {
183 userAgent.UaString = this.ua;
184 userAgent.UaClass = "Unrecognized";
185 userAgent.UaClassCode = "unrecognized";
186
187 if (dt.Connected)
188 {
189 //Client
190 this.processClient(_userAgent, ref os_id, ref client_id, ref client_class_id);
191 //OS
192 this.processOS(_userAgent, ref os_id, client_id);
193 // device
194 this.processDevice(_userAgent, ref client_class_id);
195
196 if (userAgent.OsFamilyCode != null && userAgent.OsFamilyCode != "" )
197 {
198 this.processDeviceBrand();
199 }
200 //set cache
201 if (this.useCache)
202 cache.Set(_userAgent, this.userAgent);
203 }
204 }
205
206 }
207
208 private void parseIP(string _ip)
209 {
210 string ipLoc;
211 if (!string.IsNullOrEmpty(_ip))
212 {
213 ipAddress.Ip = this.ip;
214
215 if (dt.Connected)
216 {
217 int ipVer = this.getIPAddressVersion(ip, out ipLoc);
218 if (ipVer != 0)
219 {
220 if (ipLoc != "")
221 _ip = ipLoc;
222
223 ipAddress.IpVer = UdgerParser.ConvertToStr(ipVer);
224
225 DataTable ipTable = dt.selectQuery(@"SELECT udger_crawler_list.id as botid,ip_last_seen,ip_hostname,ip_country,ip_city,ip_country_code,ip_classification,ip_classification_code,
226 name,ver,ver_major,last_seen,respect_robotstxt,family,family_code,family_homepage,family_icon,vendor,vendor_code,vendor_homepage,crawler_classification,crawler_classification_code,crawler_classification
227 FROM udger_ip_list
228 JOIN udger_ip_class ON udger_ip_class.id=udger_ip_list.class_id
229 LEFT JOIN udger_crawler_list ON udger_crawler_list.id=udger_ip_list.crawler_id
230 LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id
231 WHERE ip=" + '"' + _ip + '"' + " ORDER BY sequence");
232
233 if (ipTable != null && ipTable.Rows.Count > 0)
234 {
235 this.prepareIp(ipTable.Rows[0]);
236 }
237 if (ipVer == 4)
238 {
239 long ipLong = this.AddrToInt(_ip);//ip2Long.Address;
240
241 DataTable dataCenter = dt.selectQuery(@"select name, name_code, homepage
242 FROM udger_datacenter_range
243 JOIN udger_datacenter_list ON udger_datacenter_range.datacenter_id = udger_datacenter_list.id
244 where iplong_from <= " + ipLong.ToString() + " AND iplong_to >=" + ipLong.ToString());
245
246 if (dataCenter != null && dataCenter.Rows.Count > 0)
247 {
248 this.prepareIpDataCenter(dataCenter.Rows[0]);
249 }
250 }
251
252 }
253
254 }
255 }
256 }
257 #endregion
258
259 #region process methods
260
261 private void processOS(string uaString, ref int os_id, int clientId)
262 {
263
264 int rowid = findIdFromList(uaString, osWordDetector.findWords(uaString), osRegstringList);
265 if (rowid != -1)
266 {
267 string q = String.Format(UdgerSqlQuery.SQL_OS, rowid);
268 DataTable opSysRs = dt.selectQuery(q);
269 this.prepareOs(opSysRs.Rows[0], ref os_id);
270 }
271 else if(clientId != 0)
272 {
273 DataTable opSysRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_CLIENT_OS, clientId));
274 if (opSysRs != null && opSysRs.Rows.Count > 0)
275 {
276 this.prepareOs(opSysRs.Rows[0], ref os_id);
277 }
278
279 }
280 }
281
282
283 private void processClient(string uaString, ref int os_id, ref int clientId, ref int classId)
284 {
285 string q = String.Format(UdgerSqlQuery.SQL_CRAWLER, uaString);
286 DataTable userAgentRs = dt.selectQuery(q);
287 if (userAgentRs != null && userAgentRs.Rows.Count > 0 )
288 {
289
290 this.prepareUa(userAgentRs.Rows[0],true, ref clientId, ref classId);
291 classId = 99;
292 clientId = -1;
293 }
294 else {
295 int rowid = this.findIdFromList(uaString, clientWordDetector.findWords(uaString), clientRegstringList);
296 if (rowid != -1)
297 {
298 userAgentRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_CLIENT, rowid));
299 this.prepareUa(userAgentRs.Rows[0],false, ref clientId, ref classId);
300 //patchVersions(ret);
301 }
302 else {
303 userAgent.UaClass = "Unrecognized";
304 userAgent.UaClassCode = "unrecognized";
305 }
306 }
307 }
308
309 private void processDevice(string uaString, ref int classId)
310 {
311 int rowid = this.findIdFromList(uaString, deviceWordDetector.findWords(uaString), deviceRegstringList);
312 if (rowid != -1)
313 {
314 DataTable devRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_DEVICE, rowid));
315 this.prepareDevice(devRs.Rows[0], ref classId);
316 }
317 else {
318 if ( classId != -1)
319 {
320 DataTable devRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_CLIENT_CLASS, classId.ToString()));
321 if (devRs != null && devRs.Rows.Count > 0)
322 {
323 this.prepareDevice(devRs.Rows[0], ref classId);
324 }
325 }
326 }
327 }
328
329 private void processDeviceBrand()
330 {
331 System.Text.RegularExpressions.Regex reg;
332 PerlRegExpConverter regConv;
333
334 DataTable devRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_DEVICE_REGEX,this.userAgent.OsFamilyCode,this.userAgent.OsCode));
335 if (devRs != null && devRs.Rows.Count > 0)
336 {
337 foreach (DataRow row in devRs.Rows)
338 {
339 String devId = UdgerParser.ConvertToStr(row["id"]);
340 String regex = UdgerParser.ConvertToStr(row["regstring"]);
341 if (devId != null && regex != null)
342 {
343 regConv = new PerlRegExpConverter(regex, "", Encoding.UTF8);
344 reg = regConv.Regex;
345 if (reg.IsMatch(this.ua))
346 {
347 string foo = reg.Match(this.ua).Groups[1].ToString();
348 DataTable devNameListRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_DEVICE_NAME_LIST, devId, foo));
349 if (devNameListRs != null && devNameListRs.Rows.Count > 0)
350 {
351 DataRow r = devNameListRs.Rows[0];
352 userAgent.DeviceMarketname = UdgerParser.ConvertToStr(r["marketname"]);
353 userAgent.DeviceBrand = UdgerParser.ConvertToStr(r["brand"]);
354 userAgent.DeviceBrandCode = UdgerParser.ConvertToStr(r["brand_code"]);
355 userAgent.DeviceBrandHomepage = UdgerParser.ConvertToStr(r["brand_url"]);
356 userAgent.DeviceBrandIcon = UdgerParser.ConvertToStr(r["icon"]);
357 userAgent.DeviceBrandIconBig = UdgerParser.ConvertToStr(r["icon_big"]);
358 userAgent.DeviceBrandInfoUrl = @"https://udger.com/resources/ua-list/devices-brand-detail?brand=" + UdgerParser.ConvertToStr(r["brand_code"]);
359 break;
360 }
361 }
362 }
363 }
364 }
365 }
366 #endregion
367
368 #region prepare data methods
369
370 private void prepareUa(DataRow _row,Boolean crawler,ref int clientId, ref int classId)
371 {
372 System.Text.RegularExpressions.Regex searchTerm;
373 PerlRegExpConverter regConv;
374 Group group;
375
376 userAgent.Ua = UdgerParser.ConvertToStr(_row["ua"]);
377 userAgent.UaVersion = UdgerParser.ConvertToStr(_row["ua_version"]);
378 userAgent.UaVersionMajor = UdgerParser.ConvertToStr(_row["ua_version_major"]);
379 if (!crawler)
380 {
381 string pattern = UdgerParser.ConvertToStr(_row["regstring"]);
382 if (pattern != "")
383 {
384 regConv = new PerlRegExpConverter(pattern, "", Encoding.UTF8);
385 searchTerm = regConv.Regex;
386 if (searchTerm.IsMatch(this.ua) && (group = searchTerm.Match(this.ua).Groups[1]) != null)
387 {
388
389 userAgent.Ua = UdgerParser.ConvertToStr(_row["ua"]) + " " + UdgerParser.ConvertToStr(group);
390 userAgent.UaVersion = UdgerParser.ConvertToStr(group);
391 userAgent.UaVersionMajor = UdgerParser.ConvertToStr(group).Split('.')[0];
392 }
393 }
394 }
395 clientId = UdgerParser.ConvertToInt(_row["client_id"]);
396 classId = UdgerParser.ConvertToInt(_row["class_id"]);
397 userAgent.CrawlerCategory = UdgerParser.ConvertToStr(_row["crawler_category"]);
398 userAgent.CrawlerCategoryCode = UdgerParser.ConvertToStr(_row["crawler_category_code"]);
399 userAgent.CrawlerLastSeen = UdgerParser.ConvertToStr(_row["crawler_last_seen"]);
400 userAgent.CrawlerRespectRobotstxt = UdgerParser.ConvertToStr(_row["crawler_respect_robotstxt"]);
401 userAgent.UaString = this.ua;
402 userAgent.UaClass = UdgerParser.ConvertToStr(_row["ua_class"]);
403 userAgent.UaClassCode = UdgerParser.ConvertToStr(_row["ua_class_code"]);
404 userAgent.UaUptodateCurrentVersion = UdgerParser.ConvertToStr(_row["ua_uptodate_current_version"]);
405 userAgent.UaFamily = UdgerParser.ConvertToStr(_row["ua_family"]);
406 userAgent.UaFamilyCode = UdgerParser.ConvertToStr(_row["ua_family_code"]);
407 userAgent.UaFamilyHompage = UdgerParser.ConvertToStr(_row["ua_family_homepage"]);
408 userAgent.UaFamilyVendor = UdgerParser.ConvertToStr(_row["ua_family_vendor"]);
409 userAgent.UaFamilyVendorCode = UdgerParser.ConvertToStr(_row["ua_family_vendor_code"]);
410 userAgent.UaFamilyVendorHomepage = UdgerParser.ConvertToStr(_row["ua_family_vendor_homepage"]);
411 userAgent.UaFamilyIcon = UdgerParser.ConvertToStr(_row["ua_family_icon"]);
412 userAgent.UaFamilyIconBig = UdgerParser.ConvertToStr(_row["ua_family_icon_big"]);
413 userAgent.UaFamilyInfoUrl = UdgerParser.ConvertToStr(_row["ua_family_info_url"]);
414 userAgent.UaEngine = UdgerParser.ConvertToStr(_row["ua_engine"]);
415
416 }
417 private void prepareOs(DataRow _row, ref int _osId)
418 {
419 //_osId = Convert.ToInt32(_row["os_id"]);
420 userAgent.Os = UdgerParser.ConvertToStr(_row["os"]);
421 userAgent.OsCode = UdgerParser.ConvertToStr(_row["os_code"]);
422 userAgent.OsHomepage = UdgerParser.ConvertToStr(_row["os_home_page"]);
423 userAgent.OsIcon = UdgerParser.ConvertToStr(_row["os_icon"]);
424 userAgent.OsIconBig = UdgerParser.ConvertToStr(_row["os_icon_big"]);
425 userAgent.OsInfoUrl = UdgerParser.ConvertToStr(_row["os_info_url"]);
426 userAgent.OsFamily = UdgerParser.ConvertToStr(_row["os_family"]);
427 userAgent.OsFamilyCode = UdgerParser.ConvertToStr(_row["os_family_code"]);
428 userAgent.OsFamilyVendor = UdgerParser.ConvertToStr(_row["os_family_vendor"]);
429 userAgent.OsFamilyVendorCode = UdgerParser.ConvertToStr(_row["os_family_vendor_code"]);
430 userAgent.OsFamilyVendorHomepage = UdgerParser.ConvertToStr(_row["os_family_vedor_homepage"]);
431
432 }
433
434 private void prepareDevice(DataRow _row, ref int _deviceClassId)
435 {
436
437 //_deviceClassId = Convert.ToInt32(_row["device_class"]);
438 userAgent.DeviceClass = UdgerParser.ConvertToStr(_row["device_class"]);
439 userAgent.DeviceClassCode = UdgerParser.ConvertToStr(_row["device_class_code"]);
440 userAgent.DeviceClassIcon = UdgerParser.ConvertToStr(_row["device_class_icon"]);
441 userAgent.DeviceClassIconBig = UdgerParser.ConvertToStr(_row["device_class_icon_big"]);
442 userAgent.DeviceClassInfoUrl = UdgerParser.ConvertToStr(_row["device_class_info_url"]);
443
444 }
445
446 private void prepareIp(DataRow _row)
447 {
448 ipAddress.IpClassification = UdgerParser.ConvertToStr(_row["ip_classification"]);
449 ipAddress.IpClassificationCode = UdgerParser.ConvertToStr(_row["ip_classification_code"]);
450 ipAddress.IpLastSeen = UdgerParser.ConvertToStr(_row["ip_last_seen"]);
451 ipAddress.IpHostname = UdgerParser.ConvertToStr(_row["ip_hostname"]);
452 ipAddress.IpCountry = UdgerParser.ConvertToStr(_row["ip_country"]);
453 ipAddress.IpCountryCode = UdgerParser.ConvertToStr(_row["ip_country_code"]);
454 ipAddress.IpCity = UdgerParser.ConvertToStr(_row["ip_city"]);
455 ipAddress.CrawlerName = UdgerParser.ConvertToStr(_row["name"]);
456 ipAddress.CrawlerVer = UdgerParser.ConvertToStr(_row["ver"]);
457 ipAddress.CrawlerVerMajor = UdgerParser.ConvertToStr(_row["ver_major"]);
458 ipAddress.CrawlerFamily = UdgerParser.ConvertToStr(_row["family"]);
459 ipAddress.CrawlerFamilyCode = UdgerParser.ConvertToStr(_row["family_code"]);
460 ipAddress.CrawlerFamilyHomepage = UdgerParser.ConvertToStr(_row["family_homepage"]);
461 ipAddress.CrawlerFamilyVendor = UdgerParser.ConvertToStr(_row["vendor"]);
462 ipAddress.CrawlerFamilyVendorCode = UdgerParser.ConvertToStr(_row["vendor_code"]);
463 ipAddress.CrawlerFamilyVendorHomepage = UdgerParser.ConvertToStr(_row["vendor_homepage"]);
464 ipAddress.CrawlerFamilyIcon = UdgerParser.ConvertToStr(_row["family_icon"]);
465 ipAddress.CrawlerLastSeen = UdgerParser.ConvertToStr(_row["last_seen"]);
466 ipAddress.CrawlerCategory = UdgerParser.ConvertToStr(_row["crawler_classification"]);
467 ipAddress.CrawlerCategoryCode = UdgerParser.ConvertToStr(_row["crawler_classification_code"]);
468 if (ipAddress.IpClassificationCode == "crawler")
469 ipAddress.CrawlerFamilyInfoUrl = "https://udger.com/resources/ua-list/bot-detail?bot=" + UdgerParser.ConvertToStr(_row["family"]) + "#id" + UdgerParser.ConvertToStr(_row["botid"]);
470 ipAddress.CrawlerRespectRobotstxt = UdgerParser.ConvertToStr(_row["respect_robotstxt"]);
471 }
472
473 private void prepareIpDataCenter(DataRow _row)
474 {
475 ipAddress.DatacenterName = UdgerParser.ConvertToStr(_row["name"]);
476 ipAddress.DatacenterNameCode = UdgerParser.ConvertToStr(_row["name_code"]);
477 ipAddress.DatacenterHomepage = UdgerParser.ConvertToStr(_row["homepage"]);
478 }
479 #endregion
480
481 private static string ConvertToStr(object value)
482 {
483 if (value == null || value.GetType() == typeof(DBNull))
484 return "";
485 return value.ToString();
486 }
487
488 private static int ConvertToInt(object value)
489 {
490 if (value == null || value.GetType() == typeof(DBNull))
491 return 0;
492 return Convert.ToInt32(value);
493 }
494 private static DateTime ConvertToDateTime(string value)
495 {
496 DateTime dt;
497 DateTime.TryParse(value, out dt);
498
499 return dt;
500 }
501
502
503 private int getIPAddressVersion(string _ip, out string _retIp)
504 {
505 System.Net.IPAddress addr;
506 _retIp = "";
507
508 if (System.Net.IPAddress.TryParse(_ip, out addr))
509 {
510 _retIp = addr.ToString();
511 if (addr.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork)
512 return 4;
513 if (addr.AddressFamily == System.Net.Sockets.AddressFamily.InterNetworkV6)
514 return 6;
515 }
516
517 return 0;
518 }
519
520 private long AddrToInt(string addr)
521 {
522
523 return (long)(uint)System.Net.IPAddress.NetworkToHostOrder(
524 (int)System.Net.IPAddress.Parse(addr).Address);
525 }
526
527 [MethodImpl(MethodImplOptions.Synchronized)]
528 private static void initStaticStructures(DataReader connection)
529 {
530 if (clientRegstringList == null) {
531
532 clientRegstringList = prepareRegexpStruct(connection, "udger_client_regex");
533 osRegstringList = prepareRegexpStruct(connection, "udger_os_regex");
534 deviceRegstringList = prepareRegexpStruct(connection, "udger_deviceclass_regex");
535
536 clientWordDetector = createWordDetector(connection, "udger_client_regex", "udger_client_regex_words");
537 deviceWordDetector = createWordDetector(connection, "udger_deviceclass_regex", "udger_deviceclass_regex_words");
538 osWordDetector = createWordDetector(connection, "udger_os_regex", "udger_os_regex_words");
539 }
540 }
541
542 private static WordDetector createWordDetector(DataReader connection, String regexTableName, String wordTableName)
543 {
544
545 HashSet<int> usedWords = new HashSet<int>();
546
547 addUsedWords(usedWords, connection, regexTableName, "word_id");
548 addUsedWords(usedWords, connection, regexTableName, "word2_id");
549
550 WordDetector result = new WordDetector();
551
552 DataTable dt = connection.selectQuery("SELECT * FROM " + wordTableName);
553 if (dt != null)
554 {
555 foreach (DataRow row in dt.Rows)
556 {
557 int id = UdgerParser.ConvertToInt(row["id"]);
558 if (usedWords.Contains(id))
559 {
560 String word = UdgerParser.ConvertToStr(row["word"]).ToLower();
561 result.addWord(id, word);
562 }
563 }
564 }
565 return result;
566 }
567
568 private static void addUsedWords(HashSet<int> usedWords, DataReader connection, String regexTableName, String wordIdColumn)
569 {
570 DataTable rs = connection.selectQuery("SELECT " + wordIdColumn + " FROM " + regexTableName);
571 if (rs != null)
572 {
573 foreach (DataRow row in rs.Rows)
574 {
575 usedWords.Add(UdgerParser.ConvertToInt(row[wordIdColumn]));
576 }
577 }
578 }
579
580 private int findIdFromList(String uaString, HashSet<int> foundClientWords, List<IdRegString> list)
581 {
582 System.Text.RegularExpressions.Regex searchTerm;
583 PerlRegExpConverter regConv;
584
585 foreach (IdRegString irs in list)
586 {
587 if ((irs.wordId1 == 0 || foundClientWords.Contains(irs.wordId1)) &&
588 (irs.wordId2 == 0 || foundClientWords.Contains(irs.wordId2)))
589 {
590 regConv = new PerlRegExpConverter(irs.pattern, "", Encoding.UTF8);
591 searchTerm = regConv.Regex;
592 if (searchTerm.IsMatch(uaString))
593 {
594 //lastPatternMatcher = irs.pattern;
595 return irs.id;
596 }
597 }
598 }
599 return -1;
600 }
601
602 private static List<IdRegString> prepareRegexpStruct(DataReader connection, String regexpTableName)
603 {
604 List<IdRegString> ret = new List<IdRegString>();
605 DataTable rs = connection.selectQuery("SELECT rowid, regstring, word_id, word2_id FROM " + regexpTableName + " ORDER BY sequence");
606
607 if (rs != null) {
608 foreach (DataRow row in rs.Rows)
609 {
610 IdRegString irs = new IdRegString();
611 irs.id = UdgerParser.ConvertToInt(row["rowid"]);
612 irs.wordId1 = UdgerParser.ConvertToInt(row["word_id"]);
613 irs.wordId2 = UdgerParser.ConvertToInt(row["word2_id"]);
614 String regex = UdgerParser.ConvertToStr(row["regstring"]);
615 // regConv = new PerlRegExpConverter(, "", Encoding.Unicode);
616 Regex reg = new Regex(@"^/?(.*?)/si$");
617 if (reg.IsMatch(regex))
618 {
619 regex = reg.Match(regex).Groups[0].ToString();
620 }
621 irs.pattern = regex;//Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
622 ret.Add(irs);
623 }
624 }
625 return ret;
626 }
627
628 #endregion
629 }
630}
void SetDataDir(string dataDir, string fileName)
Set the data directory and DB filename
Definition: UdgerParser.cs:125
UdgerParser()
Constructor
Definition: UdgerParser.cs:64
void SetDataDir(string dataDir)
Set the data directory
Definition: UdgerParser.cs:109
void parse()
Parse the useragent string and/or ip address ///
Definition: UdgerParser.cs:142
UdgerParser(int LRUCashCapacity=10000)
Constructor
Definition: UdgerParser.cs:77
UdgerParser(bool useLRUCash=true, int LRUCashCapacity=10000)
Constructor
Definition: UdgerParser.cs:91