\r\n

51Degrees Device Detection Python  4.4

Device Detection services for 51Degrees Pipeline

onpremise/performance.py

Provides an example of processing a CSV file containing evidence for device detection. There are 20,000 examples in the supplied file of evidence representing HTTP Headers. For example:

We create a device detection pipeline to read the data and output the time taken and information about about the detection.

Requesting properties from a single component reduces detection time compared with requesting properties from multiple components. If you don't specify any properties to detect, then all properties are detected.

Please review performance options and hash dataset options for more information about adjusting performance.

This example is available in full on GitHub.

This example requires a local data file. The free 'Lite' data file can be acquired by pulling the git submodules under this repository (run `git submodule update --recursive`) or from the device-detection-data GitHub repository.

The Lite data file is only used for illustration, and has limited accuracy and capabilities. Find out about the more capable data files that are available on our pricing page

Required PyPi Dependencies:

1 # *********************************************************************
2 # This Original Work is copyright of 51 Degrees Mobile Experts Limited.
3 # Copyright 2023 51 Degrees Mobile Experts Limited, Davidson House,
4 # Forbury Square, Reading, Berkshire, United Kingdom RG1 3EU.
5 #
6 # This Original Work is licensed under the European Union Public Licence
7 # (EUPL) v.1.2 and is subject to its terms as set out below.
8 #
9 # If a copy of the EUPL was not distributed with this file, You can obtain
10 # one at https://opensource.org/licenses/EUPL-1.2.
11 #
12 # The 'Compatible Licences' set out in the Appendix to the EUPL (as may be
13 # amended by the European Commission) shall be deemed incompatible for
14 # the purposes of the Work and the provisions of the compatibility
15 # clause in Article 5 of the EUPL shall not apply.
16 #
17 # If using the Work as, or as part of, a network application, by
18 # including the attribution notice(s) required under Article 5 of the EUPL
19 # in the end user terms of the application under an appropriate heading,
20 # such notice(s) shall fulfill the requirements of that article.
21 # *********************************************************************
22 
44 
45 import argparse
46 import csv
47 import json
48 import time
49 import multiprocessing as mp
50 
52 # This example goes through a CSV of 20000 user agents and processes them, returning the time and information about the matches
53 from fiftyone_devicedetection_onpremise.devicedetection_onpremise_pipelinebuilder import DeviceDetectionOnPremisePipelineBuilder
54 
55 # Here we make a function that processes a user agent
56 # And returns if it is a mobile device
57 
58 def process_user_agent(user_agent):
59 
60  # First we create the flowdata using the global pipeline
61  flowdata = pipeline.create_flowdata() # pylint: disable=used-before-assignment
62 
63  # Here we add the user agent as evidence
64  flowdata.evidence.add("header.user-agent", user_agent)
65 
66  # We process the flowdata to get the results
67  flowdata.process()
68 
69  # To check whether the User-Agent is a mobile device we look at the ismobile
70  # property inside the Device Detection Engine
71 
72  # first we check if this has a meaningful result
73 
74  if flowdata.device.ismobile.has_value():
75  return flowdata.device.ismobile.value()
76  else:
77  return None
78 
79 def process_user_agent_list(data_file, user_agent_list, list_number, output, skip=False):
80  global pipeline
81  pipeline = DeviceDetectionOnPremisePipelineBuilder(
82  data_file_path=data_file,
83  licence_keys="",
84  performance_profile='MaxPerformance',
85  add_javascript_builder=False,
86  restricted_properties=["ismobile"],
87  usage_sharing=False,
88  auto_update=False).build()
89 
90  results = {
91  "mobile": 0,
92  "notmobile": 0,
93  "unknown": 0
94  }
95  for user_agent in user_agent_list:
96  if skip:
97  break
98  result = process_user_agent(user_agent[0])
99  if(result == None):
100  results["unknown"] += 1
101  if(result == True):
102  results["mobile"] += 1
103  if(result == False):
104  results["notmobile"] += 1
105 
106  output.put(results, list_number)
107 
108 # Run the process
109 def run(data_file, skip = False):
110  # Make a queue to store the results in
111 
112  output = mp.Queue()
113 
114  # Create processes
115  processes = []
116 
117  for x in range(threads): # pylint: disable=used-before-assignment
118  processes.append(mp.Process(target=process_user_agent_list,
119  args=(data_file, split_lists[x], x, output, skip))) # pylint: disable=used-before-assignment
120 
121  # Start timer
122 
123  t0 = time.time()
124 
125  for p in processes:
126  p.start()
127 
128  # Exit the completed processes
129  for p in processes:
130  p.join()
131 
132  # Get process results from the output queue
133  results = [output.get() for p in processes]
134 
135  t1 = time.time()
136  total = t1-t0
137 
138  return {"time": total, "result": results}
139 
140 if __name__ == "__main__":
141  ap = argparse.ArgumentParser(description='Run detection benchmark.')
142  ap.add_argument('-d', '--data_file', default='', help='Path to data file')
143  ap.add_argument('-u', '--user_agents_file', default='src/fiftyone_devicedetection_onpremise/cxx/device-detection-data/20000 User Agents.csv', help='Path to user agents evidence file')
144  ap.add_argument('-j', '--json_output', default='', help='Output results in JSON format')
145  args = ap.parse_args()
146  if args.data_file == "":
147  args.data_file = ExampleUtils.find_file("51Degrees-LiteV4.1.hash")
148 
149  # First we read the contents of the 20000 user agents file as a list
150  with open(args.user_agents_file, newline='') as file:
151  reader = csv.reader(file)
152  user_agents = list(reader)
153 
154  number_of_user_agents = len(user_agents)
155 
156  print("Processing " + str(number_of_user_agents) + " user agents")
157 
158  # Now we make a function that returns results of the user agent matching
159 
160  threads = mp.cpu_count()
161 
162  print("Using " + str(threads) + " threads")
163 
164  chunk_size = int(number_of_user_agents / threads)
165 
166  # Split lists by number of threads
167  split_lists = [user_agents[x:x+chunk_size]
168  for x in range(0, len(user_agents), chunk_size)]
169 
170  calibration = run(args.data_file, skip=True)
171 
172  real = run(args.data_file, skip=False)
173 
174  real_time = real["time"]
175 
176  print("Total time (seconds): " + str(real_time) + " seconds")
177  print("Time per user agent (ms): " + str((real_time / number_of_user_agents) * 1000))
178 
179  if args.json_output != "":
180  results = {
181  "DetectionsPerSecond": 1.0 / (real_time / number_of_user_agents),
182  "MsPerDetection": real_time * 1000 / number_of_user_agents
183  }
184  with open(args.json_output, "w") as file:
185  print(json.dumps(results), file = file)
186 
187  final_result = {
188  "mobile": 0,
189  "notmobile": 0,
190  "unknown": 0
191  }
192 
193  for result in real["result"]:
194  final_result["unknown"] += result["unknown"]
195  final_result["mobile"] += result["mobile"]
196  final_result["notmobile"] += result["notmobile"]
197 
198  print("Results", final_result)