Provides an example of processing a CSV file containing evidence for device detection. There are 20,000 examples in the supplied file of evidence representing HTTP Headers. For example:
We create a device detection pipeline to read the data and output the time taken and information about about the detection.
Requesting properties from a single component reduces detection time compared with requesting properties from multiple components. If you don't specify any properties to detect, then all properties are detected.
This example requires a local data file. The free 'Lite' data file can be acquired by
pulling the git submodules under this repository (run `git submodule update --recursive`)
or from the device-detection-data
GitHub repository.
The Lite data file is only used for illustration, and has limited accuracy and capabilities.
Find out about the more capable data files that are available on our
pricing page
49 import multiprocessing
as mp
61 flowdata = pipeline.create_flowdata()
64 flowdata.evidence.add(
"header.user-agent", user_agent)
74 if flowdata.device.ismobile.has_value():
75 return flowdata.device.ismobile.value()
81 pipeline = DeviceDetectionOnPremisePipelineBuilder(
82 data_file_path=data_file,
84 performance_profile=
'MaxPerformance',
85 add_javascript_builder=
False,
86 restricted_properties=[
"ismobile"],
88 auto_update=
False).
build()
95 for user_agent
in user_agent_list:
100 results[
"unknown"] += 1
102 results[
"mobile"] += 1
104 results[
"notmobile"] += 1
106 output.put(results, list_number)
109 def run(data_file, skip = False):
117 for x
in range(threads):
118 processes.append(mp.Process(target=process_user_agent_list,
119 args=(data_file, split_lists[x], x, output, skip)))
133 results = [output.get()
for p
in processes]
138 return {
"time": total,
"result": results}
140 if __name__ ==
"__main__":
141 ap = argparse.ArgumentParser(description=
'Run detection benchmark.')
142 ap.add_argument(
'-d',
'--data_file', default=
'', help=
'Path to data file')
143 ap.add_argument(
'-u',
'--user_agents_file', default=
'src/fiftyone_devicedetection_onpremise/cxx/device-detection-data/20000 User Agents.csv', help=
'Path to user agents evidence file')
144 ap.add_argument(
'-j',
'--json_output', default=
'', help=
'Output results in JSON format')
145 args = ap.parse_args()
146 if args.data_file ==
"":
147 args.data_file = ExampleUtils.find_file(
"51Degrees-LiteV4.1.hash")
150 with open(args.user_agents_file, newline=
'')
as file:
151 reader = csv.reader(file)
152 user_agents = list(reader)
154 number_of_user_agents = len(user_agents)
156 print(
"Processing " + str(number_of_user_agents) +
" user agents")
160 threads = mp.cpu_count()
162 print(
"Using " + str(threads) +
" threads")
164 chunk_size = int(number_of_user_agents / threads)
167 split_lists = [user_agents[x:x+chunk_size]
168 for x
in range(0, len(user_agents), chunk_size)]
170 calibration =
run(args.data_file, skip=
True)
172 real =
run(args.data_file, skip=
False)
174 real_time = real[
"time"]
176 print(
"Total time (seconds): " + str(real_time) +
" seconds")
177 print(
"Time per user agent (ms): " + str((real_time / number_of_user_agents) * 1000))
179 if args.json_output !=
"":
181 "DetectionsPerSecond": 1.0 / (real_time / number_of_user_agents),
182 "MsPerDetection": real_time * 1000 / number_of_user_agents
184 with open(args.json_output,
"w")
as file:
185 print(json.dumps(results), file = file)
193 for result
in real[
"result"]:
194 final_result[
"unknown"] += result[
"unknown"]
195 final_result[
"mobile"] += result[
"mobile"]
196 final_result[
"notmobile"] += result[
"notmobile"]
198 print(
"Results", final_result)