UdgerParser.cs
1/*
2 UdgerParser - Local parser lib
3
4 UdgerParser class parses useragent strings based on a database downloaded from udger.com
5
6
7 author The Udger.com Team (info@udger.com)
8 copyright Copyright (c) Udger s.r.o.
9 license GNU Lesser General Public License
10 link https://udger.com/products/local_parser
11
12 Third Party lib:
13 ADO.NET Data Provider for SQLite - http://www.sqlite.org/ - Public domain
14 RegExpPerl.cs - https://github.com/DEVSENSE/Phalanger/blob/master/Source/ClassLibrary/RegExpPerl.cs - Apache License Version 2.0
15 */
16
17using System;
18using System.Text;
19using System.Data;
20using System.IO;
21using System.Collections.Generic;
22using System.Runtime.CompilerServices;
23using System.Text.RegularExpressions;
24using Udger.Parser.Input;
25using System.Linq;
26
27namespace Udger.Parser
28{
29 public class UdgerParser
30 {
31
32 public UserAgent userAgent { get; private set; }
33 public IPAddress ipAddress { get; private set; }
34
35 public string ip { get; set; }
36 public string ua { get; set; }
37 public Header header { get; set; }
38
39 #region private Variables
40 private struct IdRegString
41 {
42 public int id;
43 public int wordId1;
44 public int wordId2;
45 public string pattern;
46 }
47
48 private LRUCache<string, UserAgent> cache;
49 private LRUCache<string, UserAgent> headerCache;
50 private bool useCache;
51 private DataReader dt;
52 private static WordDetector clientWordDetector;
53 private static WordDetector deviceWordDetector;
54 private static WordDetector osWordDetector;
55
56 private static List<IdRegString> clientRegstringList;
57 private static List<IdRegString> osRegstringList;
58 private static List<IdRegString> deviceRegstringList;
59
60 private readonly Dictionary<string, Regex> regexCache = new Dictionary<string, Regex>();
61 private readonly Dictionary<string, string> preparedStmtMap = new Dictionary<string, string>();
62 #endregion
63
64 #region Constructor
68 public UdgerParser()
69 {
70 dt = new DataReader();
71 this.ua = "";
72 this.ip = "";
73 this.useCache = true;
74 cache = new LRUCache<string, UserAgent>(10000);
75 headerCache = new LRUCache<string, UserAgent>(10000);
76 header = new Header();
77
78 }
83 public UdgerParser(int LRUCashCapacity)
84 {
85 dt = new DataReader();
86 this.ua = "";
87 this.ip = "";
88 this.useCache = true;
89 cache = new LRUCache<string, UserAgent>(LRUCashCapacity);
90 headerCache = new LRUCache<string, UserAgent>(LRUCashCapacity);
91 header = new Header();
92
93 }
99 public UdgerParser(bool useLRUCash, int LRUCashCapacity)
100 {
101 this.ua = "";
102 this.ip = "";
103 if (useLRUCash)
104 {
105 cache = new LRUCache<string, UserAgent>(LRUCashCapacity);
106 headerCache = new LRUCache<string, UserAgent>(LRUCashCapacity);
107 }
108
109 this.useCache = useLRUCash;
110 dt = new DataReader();
111 header = new Header();
112
113 }
114 #endregion
115
116 #region setParser method
121 public void SetDataDir(string dataDir)
122 {
123 if (!Directory.Exists(dataDir))
124 throw new Exception("Data dir not found");
125
126 dt.data_dir = dataDir;
127 dt.DataSourcePath = dataDir + @"\udgerdb_v4.dat";
128
129 if (!File.Exists(dt.DataSourcePath))
130 throw new Exception("Data file udgerdb_v3.dat not found");
131 }
137 public void SetDataDir(string dataDir, string fileName)
138 {
139 if (!Directory.Exists(dataDir))
140 throw new Exception("Data dir not found");
141
142 dt.data_dir = dataDir;
143 dt.DataSourcePath = dataDir + @"\" + fileName;
144
145 if (!File.Exists(dt.DataSourcePath))
146 throw new Exception("Data file " + fileName + " not found");
147 }
148
149 public void setHeader(string headerStr)
150 {
151 string[] stringSeparators = new string[] { ": " };
152 var rows = headerStr.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries)
153 .Select(x => new { Key = x.Split(stringSeparators, StringSplitOptions.None)[0], Value = x.Split(stringSeparators, StringSplitOptions.None)[1] }).ToArray();
154
155 header.SecChUa = Array.Find(rows, r => r.Key.ToLower() == "Sec-Ch-Ua".ToLower()) != null ? Array.Find(rows, r => r.Key.ToLower() == "Sec-Ch-Ua".ToLower()).Value.ToString() : "" ;
156 header.SecChUaFullVersionList = Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-full-version-list".ToLower()) != null ? Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-full-version-list".ToLower()).Value.ToString() : "";
157 header.SecChUaMobile = Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-mobile".ToLower()) != null ? Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-mobile".ToLower()).Value.ToString() : "";
158 header.SecChUaFullVersion = Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-full-version".ToLower()) != null ? Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-full-version".ToLower()).Value.ToString() : "";
159 header.SecChUaPlatform = Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-platform".ToLower()) != null ? Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-platform".ToLower()).Value.ToString() : "";
160 header.SecChUaPlatformVersion = Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-platform-version".ToLower()) != null ? Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-platform-version".ToLower()).Value.ToString() : "";
161 header.SecChUaModel = Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-model".ToLower()) != null ? Array.Find(rows, r => r.Key.ToLower() == "sec-ch-ua-model".ToLower()).Value.ToString() : "";
162 header.Ua = Array.Find(rows, r => r.Key.ToLower() == "user-agent".ToLower()) != null ? Array.Find(rows, r => r.Key.ToLower() == "user-agent".ToLower()).Value.ToString() : "";
163 this.ua = header.Ua;
164 }
165 #endregion
166
167 #region public method
171 public void parse()
172 {
173 this.ipAddress = new IPAddress();
174 this.userAgent = new UserAgent();
175 UserAgent uaCache;
176
177 dt.connect(this);
178 UdgerParser.initStaticStructures(dt);
179 if (dt.Connected)
180 {
181
182 if (this.ua != "")
183 {
184 if (useCache && cache.TryGetValue(this.ua, out uaCache))
185 userAgent = uaCache;
186 else
187 {
188 this.parseUA(this.ua.Replace("'", "''"));
189 this.ua = "";
190 }
191 }
192 if (this.isHeaderSetted())
193 {
194 if (useCache && headerCache.TryGetValue(this.header.cacheCode(), out uaCache))
195 userAgent = uaCache;
196 else
197 this.parseHeader();
198 }
199 if (this.ip != "")
200 {
201 this.parseIP(this.ip.Replace("'", "''"));
202 this.ip = "";
203 }
204
205 }
206
207 header = new Header();
208 }
209 #endregion
210
211 #region private method
212
213 #region parse
214 private void parseUA(string _userAgent)
215 {
216 int client_id = 0;
217 int client_class_id = -1;
218 int os_id = 0;
219
220 if (!string.IsNullOrEmpty(_userAgent))
221 {
222 userAgent.UaString = this.ua;
223 userAgent.UaClass = "Unrecognized";
224 userAgent.UaClassCode = "unrecognized";
225
226 if (dt.Connected)
227 {
228 //Client
229 if (!this.processClient(_userAgent, ref os_id, ref client_id, ref client_class_id))
230 {
231 //OS
232 this.processOS(_userAgent, ref os_id, client_id);
233 // device
234 this.processDevice(_userAgent, ref client_class_id);
235 }
236 else
237 {
238 //Unrecognized
239 this.proicessUnrecognizedDevice();
240
241 }
242
243 if (userAgent.OsFamilyCode != null && userAgent.OsFamilyCode != "" )
244 {
245 this.processDeviceBrand();
246 }
247
248
249 //set cache
250 if (this.useCache)
251 cache.Set(_userAgent, this.userAgent);
252 }
253 }
254
255 }
256
257 private void parseHeader()
258 {
259 int client_id = 0;
260 int client_class_id = -1;
261 int os_id = 0;
262 string headerForCache = "";
263
264 if (this.useCache)
265 {
266 headerForCache = this.header.cacheCode();
267 }
268
269 if (userAgent.UaClassCode != "crawler")
270 {
271 this.processSecCH(ref client_id, ref client_class_id, ref os_id);
272 }
273 //set cache
274 if (this.useCache && headerForCache != "")
275 headerCache.Set(headerForCache, this.userAgent);
276 }
277 private void parseIP(string _ip)
278 {
279 string ipLoc;
280 if (!string.IsNullOrEmpty(_ip))
281 {
282 ipAddress.Ip = this.ip;
283
284 if (dt.Connected)
285 {
286 int ipVer = this.getIPAddressVersion(ip, out ipLoc);
287 if (ipVer != 0)
288 {
289 if (ipLoc != "")
290 _ip = ipLoc;
291
292 ipAddress.IpVer = UdgerParser.ConvertToStr(ipVer);
293
294 DataTable ipTable = dt.selectQuery(@"SELECT udger_crawler_list.id as botid,ip_last_seen,ip_hostname,ip_country,ip_city,ip_country_code,ip_classification,ip_classification_code,
295 name,ver,ver_major,last_seen,respect_robotstxt,family,family_code,family_homepage,family_icon,vendor,vendor_code,vendor_homepage,crawler_classification,crawler_classification_code,crawler_classification
296 FROM udger_ip_list
297 JOIN udger_ip_class ON udger_ip_class.id=udger_ip_list.class_id
298 LEFT JOIN udger_crawler_list ON udger_crawler_list.id=udger_ip_list.crawler_id
299 LEFT JOIN udger_crawler_class ON udger_crawler_class.id=udger_crawler_list.class_id
300 WHERE ip=" + '"' + _ip + '"' + " ORDER BY sequence");
301
302 if (ipTable != null && ipTable.Rows.Count > 0)
303 {
304 this.prepareIp(ipTable.Rows[0]);
305 }
306 if (ipVer == 4)
307 {
308 long ipLong = this.AddrToInt(_ip);//ip2Long.Address;
309
310 DataTable dataCenter = dt.selectQuery(@"select name, name_code, homepage
311 FROM udger_datacenter_range
312 JOIN udger_datacenter_list ON udger_datacenter_range.datacenter_id = udger_datacenter_list.id
313 where iplong_from <= " + ipLong.ToString() + " AND iplong_to >=" + ipLong.ToString());
314
315 if (dataCenter != null && dataCenter.Rows.Count > 0)
316 {
317 this.prepareIpDataCenter(dataCenter.Rows[0]);
318 }
319 }
320
321 }
322
323 }
324 }
325 }
326 #endregion
327
328 #region process methods
329
330 private void processOS(string uaString, ref int os_id, int clientId)
331 {
332
333 int rowid = findIdFromList(uaString, osWordDetector.findWords(uaString), osRegstringList);
334 if (rowid != -1)
335 {
336 string q = String.Format(UdgerSqlQuery.SQL_OS, rowid);
337 DataTable opSysRs = dt.selectQuery(q);
338 this.prepareOs(opSysRs.Rows[0], ref os_id);
339 }
340 else if(clientId != 0)
341 {
342 DataTable opSysRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_CLIENT_OS, clientId));
343 if (opSysRs != null && opSysRs.Rows.Count > 0)
344 {
345 this.prepareOs(opSysRs.Rows[0], ref os_id);
346 }
347
348 }
349 }
350
351
352 private bool processClient(string uaString, ref int os_id, ref int clientId, ref int classId)
353 {
354 string q = String.Format(UdgerSqlQuery.SQL_CRAWLER, uaString);
355 DataTable userAgentRs = dt.selectQuery(q);
356 bool isCrawler;
357 if (userAgentRs != null && userAgentRs.Rows.Count > 0 )
358 {
359
360 isCrawler = true;
361 this.prepareUa(userAgentRs.Rows[0],true, ref clientId, ref classId);
362 classId = 99;
363 clientId = -1;
364
365 }
366 else {
367 isCrawler = false;
368 int rowid = this.findIdFromList(uaString, clientWordDetector.findWords(uaString), clientRegstringList);
369 if (rowid != -1)
370 {
371 userAgentRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_CLIENT, rowid));
372 this.prepareUa(userAgentRs.Rows[0],false, ref clientId, ref classId);
373 //patchVersions(ret);
374 }
375 else {
376 userAgent.UaClass = "Unrecognized";
377 userAgent.UaClassCode = "unrecognized";
378 }
379 }
380 return isCrawler;
381 }
382
383
384 private void processDevice(string uaString, ref int classId)
385 {
386 int rowid = this.findIdFromList(uaString, deviceWordDetector.findWords(uaString), deviceRegstringList);
387 if (rowid != -1)
388 {
389 DataTable devRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_DEVICE, rowid));
390 this.prepareDevice(devRs.Rows[0], ref classId);
391 }
392 else {
393 if (classId != -1)
394 {
395 DataTable devRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_CLIENT_CLASS, classId.ToString()));
396 if (devRs != null && devRs.Rows.Count > 0)
397 {
398 this.prepareDevice(devRs.Rows[0], ref classId);
399 }
400 }
401 else {
402 DataTable devRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_CLIENT_CLASS, 1));
403 if (devRs != null && devRs.Rows.Count > 0)
404 {
405 this.prepareDevice(devRs.Rows[0], ref classId);
406 }
407
408 }
409
410 }
411
412 }
413
414 private void proicessUnrecognizedDevice()
415 {
416 DataTable devRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_DEVICECLASSLIST_CH, 1));
417 DataRow row = devRs.Rows[0];
418 if (row != null)
419 {
420 this.prepareUnrecoginzedDevice(row);
421 }
422
423 }
424
425 private void processSecCH(ref int clientId, ref int classId, ref int osid)
426 {
427 System.Text.RegularExpressions.Regex reg;
428 PerlRegExpConverter regConv;
429
430 this.userAgent.SecChUa = header.SecChUa;
431 this.userAgent.SecChUaFullVersion = header.SecChUaFullVersion?.Trim('"');
432 this.userAgent.SecChUaFullVersionList = header.SecChUaFullVersionList;
433 this.userAgent.SecChUaModel = header.SecChUaModel;
434 this.userAgent.SecChUaPlatform = header.SecChUaPlatform?.Trim('"');
435 this.userAgent.SecChUaPlatformVersion = header.SecChUaPlatformVersion?.Trim('"');
436
437 if (String.IsNullOrEmpty(this.header.SecChUaMobile) || this.header.SecChUaMobile == "?0")
438 {
439 if(String.IsNullOrEmpty(this.header.SecChUaMobile))
440 this.userAgent.SecChUaMobile = header.SecChUaMobile;
441 else
442 this.userAgent.SecChUaMobile = "0";
443 this.header.SecChUaMobile = "0";
444 }
445 else
446 {
447 this.header.SecChUaMobile = "1";
448 this.userAgent.SecChUaMobile = header.SecChUaMobile;
449 }
450
451
452
453 if (!String.IsNullOrEmpty(this.header.SecChUaFullVersionList) || !String.IsNullOrEmpty(this.header.SecChUa))
454 {
455 string regstringSearch = String.IsNullOrEmpty(this.header.SecChUaFullVersionList) ? this.header.SecChUa : this.header.SecChUaFullVersionList;
456 DataTable uaRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_CLIENT_CH_REGEX, this.header.SecChUaMobile));
457
458 foreach (DataRow row in uaRs.Rows)
459 {
460
461 String regex = UdgerParser.ConvertToStr(row["regstring"]);
462
463 regConv = new PerlRegExpConverter(regex, "", Encoding.UTF8);
464 reg = regConv.Regex;
465 if (reg.IsMatch(regstringSearch))
466 {
467 var ver = reg.Match(regstringSearch).Groups[1].ToString();
468 string versionMajor;
469
470 if (!String.IsNullOrEmpty(this.header.SecChUaFullVersionList))
471 versionMajor = ver.Split('.')[0].ToString();
472 else
473 {
474 versionMajor = ver;
475 ver = String.IsNullOrEmpty(header.SecChUaFullVersion) ? ver : header.SecChUaFullVersion.Trim('"');
476 }
477
478 clientId = UdgerParser.ConvertToInt(row["client_id"]);
479 classId = UdgerParser.ConvertToInt(row["class_id"]);
480 userAgent.UaString = header.Ua;
481 userAgent.UaClass = UdgerParser.ConvertToStr(row["client_classification"]);
482 userAgent.UaClassCode = UdgerParser.ConvertToStr(row["client_classification_code"]);
483 userAgent.Ua = $"{UdgerParser.ConvertToStr(row["name"])} {ver}";
484 userAgent.UaVersion = ver;
485 userAgent.UaVersionMajor = versionMajor;
486 userAgent.UaUptodateCurrentVersion = UdgerParser.ConvertToStr(row["uptodate_current_version"]);
487 userAgent.UaFamily = UdgerParser.ConvertToStr(row["name"]);
488 userAgent.UaFamilyCode = UdgerParser.ConvertToStr(row["name_code"]);
489 userAgent.UaFamilyHompage = UdgerParser.ConvertToStr(row["homepage"]);
490 userAgent.UaFamilyVendor = UdgerParser.ConvertToStr(row["vendor"]);
491 userAgent.UaFamilyVendorCode = UdgerParser.ConvertToStr(row["vendor_code"]);
492 userAgent.UaFamilyVendorHomepage = UdgerParser.ConvertToStr(row["vendor_homepage"]);
493 userAgent.UaFamilyIcon = UdgerParser.ConvertToStr(row["icon"]);
494 userAgent.UaFamilyIconBig = UdgerParser.ConvertToStr(row["icon_big"]);
495 userAgent.UaFamilyInfoUrl = @"https://udger.com/resources/ua-list/browser-detail?browser=" + UdgerParser.ConvertToStr(row["name"]);
496 userAgent.UaEngine = UdgerParser.ConvertToStr(row["engine"]);
497 break;
498 }
499 }
500
501 }
502
503 DataTable osRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_OS_CH_REGEX, String.IsNullOrEmpty(this.header.SecChUaPlatformVersion) ? "" : this.header.SecChUaPlatformVersion.Trim('"')));
504 foreach (DataRow row in osRs.Rows)
505 {
506 String regex = UdgerParser.ConvertToStr(row["regstring"]);
507
508 regConv = new PerlRegExpConverter(regex, "", Encoding.UTF8);
509 reg = regConv.Regex;
510 if (!String.IsNullOrEmpty(this.header.SecChUaPlatform) && reg.IsMatch(this.header.SecChUaPlatform))
511 {
512 osid = UdgerParser.ConvertToInt(row["os_id"]);
513 userAgent.Os = UdgerParser.ConvertToStr(row["name"]);
514 userAgent.OsCode = UdgerParser.ConvertToStr(row["name_code"]);
515 userAgent.OsHomepage = UdgerParser.ConvertToStr(row["homepage"]);
516 userAgent.OsIcon = UdgerParser.ConvertToStr(row["icon"]);
517 userAgent.OsIconBig = UdgerParser.ConvertToStr(row["icon_big"]);
518 userAgent.OsInfoUrl = @"https://udger.com/resources/ua-list/os-detail?os=" + UdgerParser.ConvertToStr(row["name"]);
519 userAgent.OsFamily = UdgerParser.ConvertToStr(row["family"]);
520 userAgent.OsFamilyCode = UdgerParser.ConvertToStr(row["family_code"]);
521 userAgent.OsFamilyVendor = UdgerParser.ConvertToStr(row["vendor"]);
522 userAgent.OsFamilyVendorCode = UdgerParser.ConvertToStr(row["vendor_code"]);
523 userAgent.OsFamilyVendorHomepage = UdgerParser.ConvertToStr(row["vendor_homepage"]);
524 break;
525 }
526 }
527
528 if (!String.IsNullOrEmpty(userAgent.SecChUaModel) && !String.IsNullOrEmpty(userAgent.OsFamilyCode))
529 {
530 DataTable devRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_DEVICENAME_CH_REGEX, userAgent.OsFamilyCode,userAgent.OsFamilyCode,userAgent.OsCode));
531 if (devRs.Rows != null && devRs.Rows.Count == 1)
532 {
533 DataRow row = devRs.Rows[0];
534 DataTable devRsc = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_DEVICENAMELIST_CH,row["id"], userAgent.SecChUaModel.Trim('"')));
535
536 if (devRsc.Rows != null && devRsc.Rows.Count >= 1)
537 {
538 DataRow rowc = devRsc.Rows[0];
539
540 userAgent.DeviceMarketname = UdgerParser.ConvertToStr(rowc["marketname"]);
541 userAgent.DeviceBrand = UdgerParser.ConvertToStr(rowc["brand"]);
542 userAgent.DeviceBrandCode = UdgerParser.ConvertToStr(rowc["brand_code"]);
543 userAgent.DeviceBrandHomepage = UdgerParser.ConvertToStr(rowc["brand_url"]);
544 userAgent.DeviceBrandIcon = UdgerParser.ConvertToStr(rowc["icon"]);
545 userAgent.DeviceBrandIconBig = UdgerParser.ConvertToStr(rowc["icon_big"]);
546 userAgent.DeviceBrandInfoUrl = @"https://udger.com/resources/ua-list/devices-brand-detail?brand=" + UdgerParser.ConvertToStr(rowc["brand_code"]);
547
548 devRsc = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_DEVICECLASSLIST_CH, rowc["deviceclass_id"]));
549
550 DataRow rowdc = devRsc.Rows[0];
551
552 userAgent.DeviceClass = UdgerParser.ConvertToStr(rowdc["name"]);
553 userAgent.DeviceClassCode = UdgerParser.ConvertToStr(rowdc["name_code"]);
554 userAgent.DeviceClassIcon = UdgerParser.ConvertToStr(rowdc["icon"]);
555 userAgent.DeviceClassIconBig = UdgerParser.ConvertToStr(rowdc["icon_big"]);
556 userAgent.DeviceClassInfoUrl = @"https://udger.com/resources/ua-list/device-detail?device=" + UdgerParser.ConvertToStr(rowdc["name"]);
557
558 }
559 }
560
561 }
562
563 if (String.IsNullOrEmpty(userAgent.DeviceClass) && !String.IsNullOrEmpty(userAgent.UaClassCode))
564 {
565 DataTable devList = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_DEVICECLASSLISTMOBILE_CH, header.SecChUaMobile));
566 if (devList.Rows != null && devList.Rows.Count >= 1)
567 {
568 DataRow r = devList.Rows[0];
569 userAgent.DeviceClass = UdgerParser.ConvertToStr(r["name"]);
570 userAgent.DeviceClassCode = UdgerParser.ConvertToStr(r["name_code"]);
571 userAgent.DeviceClassIcon = UdgerParser.ConvertToStr(r["icon"]);
572 userAgent.DeviceClassIconBig = UdgerParser.ConvertToStr(r["icon_big"]);
573 userAgent.DeviceClassInfoUrl = @"https://udger.com/resources/ua-list/device-detail?device=" + UdgerParser.ConvertToStr(r["name"]);
574 }
575 }
576 }
577
578 private void processDeviceBrand()
579 {
580 System.Text.RegularExpressions.Regex reg;
581 PerlRegExpConverter regConv;
582
583 DataTable devRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_DEVICE_REGEX,this.userAgent.OsFamilyCode,this.userAgent.OsCode));
584 if (devRs != null && devRs.Rows.Count > 0)
585 {
586 foreach (DataRow row in devRs.Rows)
587 {
588 String devId = UdgerParser.ConvertToStr(row["id"]);
589 String regex = UdgerParser.ConvertToStr(row["regstring"]);
590 if (devId != null && regex != null)
591 {
592 regConv = new PerlRegExpConverter(regex, "", Encoding.UTF8);
593 reg = regConv.Regex;
594 if (reg.IsMatch(this.ua))
595 {
596 string foo = reg.Match(this.ua).Groups[1].ToString();
597 DataTable devNameListRs = dt.selectQuery(String.Format(UdgerSqlQuery.SQL_DEVICE_NAME_LIST, devId, foo));
598 if (devNameListRs != null && devNameListRs.Rows.Count > 0)
599 {
600 DataRow r = devNameListRs.Rows[0];
601 userAgent.DeviceMarketname = UdgerParser.ConvertToStr(r["marketname"]);
602 userAgent.DeviceBrand = UdgerParser.ConvertToStr(r["brand"]);
603 userAgent.DeviceBrandCode = UdgerParser.ConvertToStr(r["brand_code"]);
604 userAgent.DeviceBrandHomepage = UdgerParser.ConvertToStr(r["brand_url"]);
605 userAgent.DeviceBrandIcon = UdgerParser.ConvertToStr(r["icon"]);
606 userAgent.DeviceBrandIconBig = UdgerParser.ConvertToStr(r["icon_big"]);
607 userAgent.DeviceBrandInfoUrl = @"https://udger.com/resources/ua-list/devices-brand-detail?brand=" + UdgerParser.ConvertToStr(r["brand_code"]);
608 break;
609 }
610 }
611 }
612 }
613 }
614 }
615 #endregion
616
617 #region prepare data methods
618
619 private void prepareUa(DataRow _row,Boolean crawler,ref int clientId, ref int classId)
620 {
621 System.Text.RegularExpressions.Regex searchTerm;
622 PerlRegExpConverter regConv;
623 Group group;
624
625 userAgent.Ua = UdgerParser.ConvertToStr(_row["ua"]);
626 userAgent.UaVersion = UdgerParser.ConvertToStr(_row["ua_version"]);
627 userAgent.UaVersionMajor = UdgerParser.ConvertToStr(_row["ua_version_major"]);
628 if (!crawler)
629 {
630 string pattern = UdgerParser.ConvertToStr(_row["regstring"]);
631 if (pattern != "")
632 {
633 regConv = new PerlRegExpConverter(pattern, "", Encoding.UTF8);
634 searchTerm = regConv.Regex;
635 if (searchTerm.IsMatch(this.ua) && (group = searchTerm.Match(this.ua).Groups[1]) != null)
636 {
637
638 userAgent.Ua = UdgerParser.ConvertToStr(_row["ua"]) + " " + UdgerParser.ConvertToStr(group);
639 userAgent.UaVersion = UdgerParser.ConvertToStr(group);
640 userAgent.UaVersionMajor = UdgerParser.ConvertToStr(group).Split('.')[0];
641 }
642 }
643 }
644 clientId = UdgerParser.ConvertToInt(_row["client_id"]);
645 classId = UdgerParser.ConvertToInt(_row["class_id"]);
646 userAgent.CrawlerCategory = UdgerParser.ConvertToStr(_row["crawler_category"]);
647 userAgent.CrawlerCategoryCode = UdgerParser.ConvertToStr(_row["crawler_category_code"]);
648 userAgent.CrawlerLastSeen = UdgerParser.ConvertToStr(_row["crawler_last_seen"]);
649 userAgent.CrawlerRespectRobotstxt = UdgerParser.ConvertToStr(_row["crawler_respect_robotstxt"]);
650 userAgent.UaString = this.ua;
651 userAgent.UaClass = UdgerParser.ConvertToStr(_row["ua_class"]);
652 userAgent.UaClassCode = UdgerParser.ConvertToStr(_row["ua_class_code"]);
653 userAgent.UaUptodateCurrentVersion = UdgerParser.ConvertToStr(_row["ua_uptodate_current_version"]);
654 userAgent.UaFamily = UdgerParser.ConvertToStr(_row["ua_family"]);
655 userAgent.UaFamilyCode = UdgerParser.ConvertToStr(_row["ua_family_code"]);
656 userAgent.UaFamilyHompage = UdgerParser.ConvertToStr(_row["ua_family_homepage"]);
657 userAgent.UaFamilyVendor = UdgerParser.ConvertToStr(_row["ua_family_vendor"]);
658 userAgent.UaFamilyVendorCode = UdgerParser.ConvertToStr(_row["ua_family_vendor_code"]);
659 userAgent.UaFamilyVendorHomepage = UdgerParser.ConvertToStr(_row["ua_family_vendor_homepage"]);
660 userAgent.UaFamilyIcon = UdgerParser.ConvertToStr(_row["ua_family_icon"]);
661 userAgent.UaFamilyIconBig = UdgerParser.ConvertToStr(_row["ua_family_icon_big"]);
662 userAgent.UaFamilyInfoUrl = UdgerParser.ConvertToStr(_row["ua_family_info_url"]);
663 userAgent.UaEngine = UdgerParser.ConvertToStr(_row["ua_engine"]);
664
665 }
666 private void prepareOs(DataRow _row, ref int _osId)
667 {
668 //_osId = Convert.ToInt32(_row["os_id"]);
669 userAgent.Os = UdgerParser.ConvertToStr(_row["os"]);
670 userAgent.OsCode = UdgerParser.ConvertToStr(_row["os_code"]);
671 userAgent.OsHomepage = UdgerParser.ConvertToStr(_row["os_home_page"]);
672 userAgent.OsIcon = UdgerParser.ConvertToStr(_row["os_icon"]);
673 userAgent.OsIconBig = UdgerParser.ConvertToStr(_row["os_icon_big"]);
674 userAgent.OsInfoUrl = UdgerParser.ConvertToStr(_row["os_info_url"]);
675 userAgent.OsFamily = UdgerParser.ConvertToStr(_row["os_family"]);
676 userAgent.OsFamilyCode = UdgerParser.ConvertToStr(_row["os_family_code"]);
677 userAgent.OsFamilyVendor = UdgerParser.ConvertToStr(_row["os_family_vendor"]);
678 userAgent.OsFamilyVendorCode = UdgerParser.ConvertToStr(_row["os_family_vendor_code"]);
679 userAgent.OsFamilyVendorHomepage = UdgerParser.ConvertToStr(_row["os_family_vedor_homepage"]);
680
681 }
682
683 private void prepareDevice(DataRow _row, ref int _deviceClassId)
684 {
685
686 //_deviceClassId = Convert.ToInt32(_row["device_class"]);
687 userAgent.DeviceClass = UdgerParser.ConvertToStr(_row["device_class"]);
688 userAgent.DeviceClassCode = UdgerParser.ConvertToStr(_row["device_class_code"]);
689 userAgent.DeviceClassIcon = UdgerParser.ConvertToStr(_row["device_class_icon"]);
690 userAgent.DeviceClassIconBig = UdgerParser.ConvertToStr(_row["device_class_icon_big"]);
691 userAgent.DeviceClassInfoUrl = UdgerParser.ConvertToStr(_row["device_class_info_url"]);
692
693 }
694 private void prepareUnrecoginzedDevice(DataRow _row)
695 {
696
697 userAgent.DeviceClass = UdgerParser.ConvertToStr(_row["name"]);
698 userAgent.DeviceClassCode = UdgerParser.ConvertToStr(_row["name_code"]);
699 userAgent.DeviceClassIcon = UdgerParser.ConvertToStr(_row["icon"]);
700 userAgent.DeviceClassIconBig = UdgerParser.ConvertToStr(_row["icon_big"]);
701 userAgent.DeviceClassInfoUrl = @"https://udger.com/resources/ua-list/device-detail?device=" + UdgerParser.ConvertToStr(_row["name"]);
702 }
703
704 private void prepareIp(DataRow _row)
705 {
706 ipAddress.IpClassification = UdgerParser.ConvertToStr(_row["ip_classification"]);
707 ipAddress.IpClassificationCode = UdgerParser.ConvertToStr(_row["ip_classification_code"]);
708 ipAddress.IpLastSeen = UdgerParser.ConvertToStr(_row["ip_last_seen"]);
709 ipAddress.IpHostname = UdgerParser.ConvertToStr(_row["ip_hostname"]);
710 ipAddress.IpCountry = UdgerParser.ConvertToStr(_row["ip_country"]);
711 ipAddress.IpCountryCode = UdgerParser.ConvertToStr(_row["ip_country_code"]);
712 ipAddress.IpCity = UdgerParser.ConvertToStr(_row["ip_city"]);
713 ipAddress.CrawlerName = UdgerParser.ConvertToStr(_row["name"]);
714 ipAddress.CrawlerVer = UdgerParser.ConvertToStr(_row["ver"]);
715 ipAddress.CrawlerVerMajor = UdgerParser.ConvertToStr(_row["ver_major"]);
716 ipAddress.CrawlerFamily = UdgerParser.ConvertToStr(_row["family"]);
717 ipAddress.CrawlerFamilyCode = UdgerParser.ConvertToStr(_row["family_code"]);
718 ipAddress.CrawlerFamilyHomepage = UdgerParser.ConvertToStr(_row["family_homepage"]);
719 ipAddress.CrawlerFamilyVendor = UdgerParser.ConvertToStr(_row["vendor"]);
720 ipAddress.CrawlerFamilyVendorCode = UdgerParser.ConvertToStr(_row["vendor_code"]);
721 ipAddress.CrawlerFamilyVendorHomepage = UdgerParser.ConvertToStr(_row["vendor_homepage"]);
722 ipAddress.CrawlerFamilyIcon = UdgerParser.ConvertToStr(_row["family_icon"]);
723 ipAddress.CrawlerLastSeen = UdgerParser.ConvertToStr(_row["last_seen"]);
724 ipAddress.CrawlerCategory = UdgerParser.ConvertToStr(_row["crawler_classification"]);
725 ipAddress.CrawlerCategoryCode = UdgerParser.ConvertToStr(_row["crawler_classification_code"]);
726 if (ipAddress.IpClassificationCode == "crawler")
727 ipAddress.CrawlerFamilyInfoUrl = "https://udger.com/resources/ua-list/bot-detail?bot=" + UdgerParser.ConvertToStr(_row["family"]) + "#id" + UdgerParser.ConvertToStr(_row["botid"]);
728 ipAddress.CrawlerRespectRobotstxt = UdgerParser.ConvertToStr(_row["respect_robotstxt"]);
729 }
730
731 private void prepareIpDataCenter(DataRow _row)
732 {
733 ipAddress.DatacenterName = UdgerParser.ConvertToStr(_row["name"]);
734 ipAddress.DatacenterNameCode = UdgerParser.ConvertToStr(_row["name_code"]);
735 ipAddress.DatacenterHomepage = UdgerParser.ConvertToStr(_row["homepage"]);
736 }
737 #endregion
738
739 private static string ConvertToStr(object value)
740 {
741 if (value == null || value.GetType() == typeof(DBNull))
742 return "";
743 return value.ToString();
744 }
745
746 private static int ConvertToInt(object value)
747 {
748 if (value == null || value.GetType() == typeof(DBNull))
749 return 0;
750 return Convert.ToInt32(value);
751 }
752 private static DateTime ConvertToDateTime(string value)
753 {
754 DateTime dt;
755 DateTime.TryParse(value, out dt);
756
757 return dt;
758 }
759
760
761 private int getIPAddressVersion(string _ip, out string _retIp)
762 {
763 System.Net.IPAddress addr;
764 _retIp = "";
765
766 if (System.Net.IPAddress.TryParse(_ip, out addr))
767 {
768 _retIp = addr.ToString();
769 if (addr.AddressFamily == System.Net.Sockets.AddressFamily.InterNetwork)
770 return 4;
771 if (addr.AddressFamily == System.Net.Sockets.AddressFamily.InterNetworkV6)
772 return 6;
773 }
774
775 return 0;
776 }
777
778 private long AddrToInt(string addr)
779 {
780
781 return (long)(uint)System.Net.IPAddress.NetworkToHostOrder(
782 (int)System.Net.IPAddress.Parse(addr).Address);
783 }
784
785 [MethodImpl(MethodImplOptions.Synchronized)]
786 private static void initStaticStructures(DataReader connection)
787 {
788 if (clientRegstringList == null) {
789
790 clientRegstringList = prepareRegexpStruct(connection, "udger_client_regex");
791 osRegstringList = prepareRegexpStruct(connection, "udger_os_regex");
792 deviceRegstringList = prepareRegexpStruct(connection, "udger_deviceclass_regex");
793
794 clientWordDetector = createWordDetector(connection, "udger_client_regex", "udger_client_regex_words");
795 deviceWordDetector = createWordDetector(connection, "udger_deviceclass_regex", "udger_deviceclass_regex_words");
796 osWordDetector = createWordDetector(connection, "udger_os_regex", "udger_os_regex_words");
797 }
798 }
799
800 private static WordDetector createWordDetector(DataReader connection, String regexTableName, String wordTableName)
801 {
802
803 HashSet<int> usedWords = new HashSet<int>();
804
805 addUsedWords(usedWords, connection, regexTableName, "word_id");
806 addUsedWords(usedWords, connection, regexTableName, "word2_id");
807
808 WordDetector result = new WordDetector();
809
810 DataTable dt = connection.selectQuery("SELECT * FROM " + wordTableName);
811 if (dt != null)
812 {
813 foreach (DataRow row in dt.Rows)
814 {
815 int id = UdgerParser.ConvertToInt(row["id"]);
816 if (usedWords.Contains(id))
817 {
818 String word = UdgerParser.ConvertToStr(row["word"]).ToLower();
819 result.addWord(id, word);
820 }
821 }
822 }
823 return result;
824 }
825
826 private static void addUsedWords(HashSet<int> usedWords, DataReader connection, String regexTableName, String wordIdColumn)
827 {
828 DataTable rs = connection.selectQuery("SELECT " + wordIdColumn + " FROM " + regexTableName);
829 if (rs != null)
830 {
831 foreach (DataRow row in rs.Rows)
832 {
833 usedWords.Add(UdgerParser.ConvertToInt(row[wordIdColumn]));
834 }
835 }
836 }
837
838 private int findIdFromList(String uaString, HashSet<int> foundClientWords, List<IdRegString> list)
839 {
840 System.Text.RegularExpressions.Regex searchTerm;
841 PerlRegExpConverter regConv;
842
843 foreach (IdRegString irs in list)
844 {
845 if ((irs.wordId1 == 0 || foundClientWords.Contains(irs.wordId1)) &&
846 (irs.wordId2 == 0 || foundClientWords.Contains(irs.wordId2)))
847 {
848 regConv = new PerlRegExpConverter(irs.pattern, "", Encoding.UTF8);
849 searchTerm = regConv.Regex;
850 if (searchTerm.IsMatch(uaString))
851 {
852 //lastPatternMatcher = irs.pattern;
853 return irs.id;
854 }
855 }
856 }
857 return -1;
858 }
859
860 private static List<IdRegString> prepareRegexpStruct(DataReader connection, String regexpTableName)
861 {
862 List<IdRegString> ret = new List<IdRegString>();
863 DataTable rs = connection.selectQuery("SELECT rowid, regstring, word_id, word2_id FROM " + regexpTableName + " ORDER BY sequence");
864
865 if (rs != null)
866 {
867
868 foreach (DataRow row in rs.Rows)
869 {
870 IdRegString irs = new IdRegString();
871 irs.id = UdgerParser.ConvertToInt(row["rowid"]);
872 irs.wordId1 = UdgerParser.ConvertToInt(row["word_id"]);
873 irs.wordId2 = UdgerParser.ConvertToInt(row["word2_id"]);
874 String regex = UdgerParser.ConvertToStr(row["regstring"]);
875 // regConv = new PerlRegExpConverter(, "", Encoding.Unicode);
876 Regex reg = new Regex(@"^/?(.*?)/si$");
877 if (reg.IsMatch(regex))
878 {
879 regex = reg.Match(regex).Groups[0].ToString();
880 }
881 irs.pattern = regex;//Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
882 ret.Add(irs);
883 }
884 }
885 return ret;
886 }
887
888 private bool isHeaderSetted()
889 {
890 return header != null &&
891 (!String.IsNullOrEmpty(header.SecChUa) ||
892 !String.IsNullOrEmpty(header.SecChUaFullVersionList) ||
893 !String.IsNullOrEmpty(header.SecChUaMobile) ||
894 !String.IsNullOrEmpty(header.SecChUaFullVersion) ||
895 !String.IsNullOrEmpty(header.SecChUaPlatform) ||
896 !String.IsNullOrEmpty(header.SecChUaPlatformVersion) ||
897 !String.IsNullOrEmpty(header.SecChUaModel) ||
898 !String.IsNullOrEmpty(header.SecChUa));
899
900
901 }
902 #endregion
903 }
904}
UdgerParser(bool useLRUCash, int LRUCashCapacity)
Constructor
Definition: UdgerParser.cs:99
void SetDataDir(string dataDir, string fileName)
Set the data directory and DB filename
Definition: UdgerParser.cs:137
UdgerParser()
Constructor
Definition: UdgerParser.cs:68
void SetDataDir(string dataDir)
Set the data directory
Definition: UdgerParser.cs:121
UdgerParser(int LRUCashCapacity)
Constructor
Definition: UdgerParser.cs:83
void parse()
Parse the useragent string and/or ip address ///
Definition: UdgerParser.cs:171