diff -Naur linux-5.3-rc6/arch/x86/kernel/early-quirks.c linux-5.3-rc6-agd5fed/arch/x86/kernel/early-quirks.c --- linux-5.3-rc6/arch/x86/kernel/early-quirks.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/arch/x86/kernel/early-quirks.c 2019-08-31 15:01:11.825736165 -0500 @@ -549,6 +549,7 @@ INTEL_CNL_IDS(&gen9_early_ops), INTEL_ICL_11_IDS(&gen11_early_ops), INTEL_EHL_IDS(&gen11_early_ops), + INTEL_TGL_12_IDS(&gen11_early_ops), }; struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0); diff -Naur linux-5.3-rc6/.clang-format linux-5.3-rc6-agd5fed/.clang-format --- linux-5.3-rc6/.clang-format 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/.clang-format 1969-12-31 18:00:00.000000000 -0600 @@ -1,493 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# clang-format configuration file. Intended for clang-format >= 4. -# -# For more information, see: -# -# Documentation/process/clang-format.rst -# https://clang.llvm.org/docs/ClangFormat.html -# https://clang.llvm.org/docs/ClangFormatStyleOptions.html -# ---- -AccessModifierOffset: -4 -AlignAfterOpenBracket: Align -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false -#AlignEscapedNewlines: Left # Unknown to clang-format-4.0 -AlignOperands: true -AlignTrailingComments: false -AllowAllParametersOfDeclarationOnNextLine: false -AllowShortBlocksOnASingleLine: false -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: None -AllowShortIfStatementsOnASingleLine: false -AllowShortLoopsOnASingleLine: false -AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None -AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: false -BinPackArguments: true -BinPackParameters: true -BraceWrapping: - AfterClass: false - AfterControlStatement: false - AfterEnum: false - AfterFunction: true - AfterNamespace: true - AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false - #AfterExternBlock: false # Unknown to clang-format-5.0 - BeforeCatch: false - BeforeElse: false - IndentBraces: false - #SplitEmptyFunction: true # Unknown to clang-format-4.0 - #SplitEmptyRecord: true # Unknown to clang-format-4.0 - #SplitEmptyNamespace: true # Unknown to clang-format-4.0 -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Custom -#BreakBeforeInheritanceComma: false # Unknown to clang-format-4.0 -BreakBeforeTernaryOperators: false -BreakConstructorInitializersBeforeComma: false -#BreakConstructorInitializers: BeforeComma # Unknown to clang-format-4.0 -BreakAfterJavaFieldAnnotations: false -BreakStringLiterals: false -ColumnLimit: 80 -CommentPragmas: '^ IWYU pragma:' -#CompactNamespaces: false # Unknown to clang-format-4.0 -ConstructorInitializerAllOnOneLineOrOnePerLine: false -ConstructorInitializerIndentWidth: 8 -ContinuationIndentWidth: 8 -Cpp11BracedListStyle: false -DerivePointerAlignment: false -DisableFormat: false -ExperimentalAutoDetectBinPacking: false -#FixNamespaceComments: false # Unknown to clang-format-4.0 - -# Taken from: -# git grep -h '^#define [^[:space:]]*for_each[^[:space:]]*(' include/ \ -# | sed "s,^#define \([^[:space:]]*for_each[^[:space:]]*\)(.*$, - '\1'," \ -# | sort | uniq -ForEachMacros: - - 'apei_estatus_for_each_section' - - 'ata_for_each_dev' - - 'ata_for_each_link' - - '__ata_qc_for_each' - - 'ata_qc_for_each' - - 'ata_qc_for_each_raw' - - 'ata_qc_for_each_with_internal' - - 'ax25_for_each' - - 'ax25_uid_for_each' - - '__bio_for_each_bvec' - - 'bio_for_each_bvec' - - 'bio_for_each_integrity_vec' - - '__bio_for_each_segment' - - 'bio_for_each_segment' - - 'bio_for_each_segment_all' - - 'bio_list_for_each' - - 'bip_for_each_vec' - - 'blkg_for_each_descendant_post' - - 'blkg_for_each_descendant_pre' - - 'blk_queue_for_each_rl' - - 'bond_for_each_slave' - - 'bond_for_each_slave_rcu' - - 'bpf_for_each_spilled_reg' - - 'btree_for_each_safe128' - - 'btree_for_each_safe32' - - 'btree_for_each_safe64' - - 'btree_for_each_safel' - - 'card_for_each_dev' - - 'cgroup_taskset_for_each' - - 'cgroup_taskset_for_each_leader' - - 'cpufreq_for_each_entry' - - 'cpufreq_for_each_entry_idx' - - 'cpufreq_for_each_valid_entry' - - 'cpufreq_for_each_valid_entry_idx' - - 'css_for_each_child' - - 'css_for_each_descendant_post' - - 'css_for_each_descendant_pre' - - 'device_for_each_child_node' - - 'drm_atomic_crtc_for_each_plane' - - 'drm_atomic_crtc_state_for_each_plane' - - 'drm_atomic_crtc_state_for_each_plane_state' - - 'drm_atomic_for_each_plane_damage' - - 'drm_connector_for_each_possible_encoder' - - 'drm_for_each_connector_iter' - - 'drm_for_each_crtc' - - 'drm_for_each_encoder' - - 'drm_for_each_encoder_mask' - - 'drm_for_each_fb' - - 'drm_for_each_legacy_plane' - - 'drm_for_each_plane' - - 'drm_for_each_plane_mask' - - 'drm_for_each_privobj' - - 'drm_mm_for_each_hole' - - 'drm_mm_for_each_node' - - 'drm_mm_for_each_node_in_range' - - 'drm_mm_for_each_node_safe' - - 'flow_action_for_each' - - 'for_each_active_drhd_unit' - - 'for_each_active_iommu' - - 'for_each_available_child_of_node' - - 'for_each_bio' - - 'for_each_board_func_rsrc' - - 'for_each_bvec' - - 'for_each_card_components' - - 'for_each_card_links' - - 'for_each_card_links_safe' - - 'for_each_card_prelinks' - - 'for_each_card_rtds' - - 'for_each_card_rtds_safe' - - 'for_each_cgroup_storage_type' - - 'for_each_child_of_node' - - 'for_each_clear_bit' - - 'for_each_clear_bit_from' - - 'for_each_cmsghdr' - - 'for_each_compatible_node' - - 'for_each_component_dais' - - 'for_each_component_dais_safe' - - 'for_each_comp_order' - - 'for_each_console' - - 'for_each_cpu' - - 'for_each_cpu_and' - - 'for_each_cpu_not' - - 'for_each_cpu_wrap' - - 'for_each_dev_addr' - - 'for_each_dma_cap_mask' - - 'for_each_dpcm_be' - - 'for_each_dpcm_be_rollback' - - 'for_each_dpcm_be_safe' - - 'for_each_dpcm_fe' - - 'for_each_drhd_unit' - - 'for_each_dss_dev' - - 'for_each_efi_memory_desc' - - 'for_each_efi_memory_desc_in_map' - - 'for_each_element' - - 'for_each_element_extid' - - 'for_each_element_id' - - 'for_each_endpoint_of_node' - - 'for_each_evictable_lru' - - 'for_each_fib6_node_rt_rcu' - - 'for_each_fib6_walker_rt' - - 'for_each_free_mem_range' - - 'for_each_free_mem_range_reverse' - - 'for_each_func_rsrc' - - 'for_each_hstate' - - 'for_each_if' - - 'for_each_iommu' - - 'for_each_ip_tunnel_rcu' - - 'for_each_irq_nr' - - 'for_each_link_codecs' - - 'for_each_lru' - - 'for_each_matching_node' - - 'for_each_matching_node_and_match' - - 'for_each_memblock' - - 'for_each_memblock_type' - - 'for_each_memcg_cache_index' - - 'for_each_mem_pfn_range' - - 'for_each_mem_range' - - 'for_each_mem_range_rev' - - 'for_each_migratetype_order' - - 'for_each_msi_entry' - - 'for_each_msi_entry_safe' - - 'for_each_net' - - 'for_each_netdev' - - 'for_each_netdev_continue' - - 'for_each_netdev_continue_rcu' - - 'for_each_netdev_feature' - - 'for_each_netdev_in_bond_rcu' - - 'for_each_netdev_rcu' - - 'for_each_netdev_reverse' - - 'for_each_netdev_safe' - - 'for_each_net_rcu' - - 'for_each_new_connector_in_state' - - 'for_each_new_crtc_in_state' - - 'for_each_new_mst_mgr_in_state' - - 'for_each_new_plane_in_state' - - 'for_each_new_private_obj_in_state' - - 'for_each_node' - - 'for_each_node_by_name' - - 'for_each_node_by_type' - - 'for_each_node_mask' - - 'for_each_node_state' - - 'for_each_node_with_cpus' - - 'for_each_node_with_property' - - 'for_each_of_allnodes' - - 'for_each_of_allnodes_from' - - 'for_each_of_cpu_node' - - 'for_each_of_pci_range' - - 'for_each_old_connector_in_state' - - 'for_each_old_crtc_in_state' - - 'for_each_old_mst_mgr_in_state' - - 'for_each_oldnew_connector_in_state' - - 'for_each_oldnew_crtc_in_state' - - 'for_each_oldnew_mst_mgr_in_state' - - 'for_each_oldnew_plane_in_state' - - 'for_each_oldnew_plane_in_state_reverse' - - 'for_each_oldnew_private_obj_in_state' - - 'for_each_old_plane_in_state' - - 'for_each_old_private_obj_in_state' - - 'for_each_online_cpu' - - 'for_each_online_node' - - 'for_each_online_pgdat' - - 'for_each_pci_bridge' - - 'for_each_pci_dev' - - 'for_each_pci_msi_entry' - - 'for_each_populated_zone' - - 'for_each_possible_cpu' - - 'for_each_present_cpu' - - 'for_each_prime_number' - - 'for_each_prime_number_from' - - 'for_each_process' - - 'for_each_process_thread' - - 'for_each_property_of_node' - - 'for_each_registered_fb' - - 'for_each_reserved_mem_region' - - 'for_each_rtd_codec_dai' - - 'for_each_rtd_codec_dai_rollback' - - 'for_each_rtdcom' - - 'for_each_rtdcom_safe' - - 'for_each_set_bit' - - 'for_each_set_bit_from' - - 'for_each_sg' - - 'for_each_sg_dma_page' - - 'for_each_sg_page' - - 'for_each_sibling_event' - - 'for_each_subelement' - - 'for_each_subelement_extid' - - 'for_each_subelement_id' - - '__for_each_thread' - - 'for_each_thread' - - 'for_each_zone' - - 'for_each_zone_zonelist' - - 'for_each_zone_zonelist_nodemask' - - 'fwnode_for_each_available_child_node' - - 'fwnode_for_each_child_node' - - 'fwnode_graph_for_each_endpoint' - - 'gadget_for_each_ep' - - 'genradix_for_each' - - 'genradix_for_each_from' - - 'hash_for_each' - - 'hash_for_each_possible' - - 'hash_for_each_possible_rcu' - - 'hash_for_each_possible_rcu_notrace' - - 'hash_for_each_possible_safe' - - 'hash_for_each_rcu' - - 'hash_for_each_safe' - - 'hctx_for_each_ctx' - - 'hlist_bl_for_each_entry' - - 'hlist_bl_for_each_entry_rcu' - - 'hlist_bl_for_each_entry_safe' - - 'hlist_for_each' - - 'hlist_for_each_entry' - - 'hlist_for_each_entry_continue' - - 'hlist_for_each_entry_continue_rcu' - - 'hlist_for_each_entry_continue_rcu_bh' - - 'hlist_for_each_entry_from' - - 'hlist_for_each_entry_from_rcu' - - 'hlist_for_each_entry_rcu' - - 'hlist_for_each_entry_rcu_bh' - - 'hlist_for_each_entry_rcu_notrace' - - 'hlist_for_each_entry_safe' - - '__hlist_for_each_rcu' - - 'hlist_for_each_safe' - - 'hlist_nulls_for_each_entry' - - 'hlist_nulls_for_each_entry_from' - - 'hlist_nulls_for_each_entry_rcu' - - 'hlist_nulls_for_each_entry_safe' - - 'i3c_bus_for_each_i2cdev' - - 'i3c_bus_for_each_i3cdev' - - 'ide_host_for_each_port' - - 'ide_port_for_each_dev' - - 'ide_port_for_each_present_dev' - - 'idr_for_each_entry' - - 'idr_for_each_entry_continue' - - 'idr_for_each_entry_ul' - - 'inet_bind_bucket_for_each' - - 'inet_lhash2_for_each_icsk_rcu' - - 'key_for_each' - - 'key_for_each_safe' - - 'klp_for_each_func' - - 'klp_for_each_func_safe' - - 'klp_for_each_func_static' - - 'klp_for_each_object' - - 'klp_for_each_object_safe' - - 'klp_for_each_object_static' - - 'kvm_for_each_memslot' - - 'kvm_for_each_vcpu' - - 'list_for_each' - - 'list_for_each_codec' - - 'list_for_each_codec_safe' - - 'list_for_each_entry' - - 'list_for_each_entry_continue' - - 'list_for_each_entry_continue_rcu' - - 'list_for_each_entry_continue_reverse' - - 'list_for_each_entry_from' - - 'list_for_each_entry_from_rcu' - - 'list_for_each_entry_from_reverse' - - 'list_for_each_entry_lockless' - - 'list_for_each_entry_rcu' - - 'list_for_each_entry_reverse' - - 'list_for_each_entry_safe' - - 'list_for_each_entry_safe_continue' - - 'list_for_each_entry_safe_from' - - 'list_for_each_entry_safe_reverse' - - 'list_for_each_prev' - - 'list_for_each_prev_safe' - - 'list_for_each_safe' - - 'llist_for_each' - - 'llist_for_each_entry' - - 'llist_for_each_entry_safe' - - 'llist_for_each_safe' - - 'media_device_for_each_entity' - - 'media_device_for_each_intf' - - 'media_device_for_each_link' - - 'media_device_for_each_pad' - - 'mp_bvec_for_each_page' - - 'mp_bvec_for_each_segment' - - 'nanddev_io_for_each_page' - - 'netdev_for_each_lower_dev' - - 'netdev_for_each_lower_private' - - 'netdev_for_each_lower_private_rcu' - - 'netdev_for_each_mc_addr' - - 'netdev_for_each_uc_addr' - - 'netdev_for_each_upper_dev_rcu' - - 'netdev_hw_addr_list_for_each' - - 'nft_rule_for_each_expr' - - 'nla_for_each_attr' - - 'nla_for_each_nested' - - 'nlmsg_for_each_attr' - - 'nlmsg_for_each_msg' - - 'nr_neigh_for_each' - - 'nr_neigh_for_each_safe' - - 'nr_node_for_each' - - 'nr_node_for_each_safe' - - 'of_for_each_phandle' - - 'of_property_for_each_string' - - 'of_property_for_each_u32' - - 'pci_bus_for_each_resource' - - 'ping_portaddr_for_each_entry' - - 'plist_for_each' - - 'plist_for_each_continue' - - 'plist_for_each_entry' - - 'plist_for_each_entry_continue' - - 'plist_for_each_entry_safe' - - 'plist_for_each_safe' - - 'pnp_for_each_card' - - 'pnp_for_each_dev' - - 'protocol_for_each_card' - - 'protocol_for_each_dev' - - 'queue_for_each_hw_ctx' - - 'radix_tree_for_each_slot' - - 'radix_tree_for_each_tagged' - - 'rbtree_postorder_for_each_entry_safe' - - 'rdma_for_each_port' - - 'resource_list_for_each_entry' - - 'resource_list_for_each_entry_safe' - - 'rhl_for_each_entry_rcu' - - 'rhl_for_each_rcu' - - 'rht_for_each' - - 'rht_for_each_from' - - 'rht_for_each_entry' - - 'rht_for_each_entry_from' - - 'rht_for_each_entry_rcu' - - 'rht_for_each_entry_rcu_from' - - 'rht_for_each_entry_safe' - - 'rht_for_each_rcu' - - 'rht_for_each_rcu_from' - - '__rq_for_each_bio' - - 'rq_for_each_bvec' - - 'rq_for_each_segment' - - 'scsi_for_each_prot_sg' - - 'scsi_for_each_sg' - - 'sctp_for_each_hentry' - - 'sctp_skb_for_each' - - 'shdma_for_each_chan' - - '__shost_for_each_device' - - 'shost_for_each_device' - - 'sk_for_each' - - 'sk_for_each_bound' - - 'sk_for_each_entry_offset_rcu' - - 'sk_for_each_from' - - 'sk_for_each_rcu' - - 'sk_for_each_safe' - - 'sk_nulls_for_each' - - 'sk_nulls_for_each_from' - - 'sk_nulls_for_each_rcu' - - 'snd_array_for_each' - - 'snd_pcm_group_for_each_entry' - - 'snd_soc_dapm_widget_for_each_path' - - 'snd_soc_dapm_widget_for_each_path_safe' - - 'snd_soc_dapm_widget_for_each_sink_path' - - 'snd_soc_dapm_widget_for_each_source_path' - - 'tb_property_for_each' - - 'tcf_exts_for_each_action' - - 'udp_portaddr_for_each_entry' - - 'udp_portaddr_for_each_entry_rcu' - - 'usb_hub_for_each_child' - - 'v4l2_device_for_each_subdev' - - 'v4l2_m2m_for_each_dst_buf' - - 'v4l2_m2m_for_each_dst_buf_safe' - - 'v4l2_m2m_for_each_src_buf' - - 'v4l2_m2m_for_each_src_buf_safe' - - 'virtio_device_for_each_vq' - - 'xa_for_each' - - 'xa_for_each_marked' - - 'xa_for_each_start' - - 'xas_for_each' - - 'xas_for_each_conflict' - - 'xas_for_each_marked' - - 'zorro_for_each_dev' - -#IncludeBlocks: Preserve # Unknown to clang-format-5.0 -IncludeCategories: - - Regex: '.*' - Priority: 1 -IncludeIsMainRegex: '(Test)?$' -IndentCaseLabels: false -#IndentPPDirectives: None # Unknown to clang-format-5.0 -IndentWidth: 8 -IndentWrappedFunctionNames: false -JavaScriptQuotes: Leave -JavaScriptWrapImports: true -KeepEmptyLinesAtTheStartOfBlocks: false -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: Inner -#ObjCBinPackProtocolList: Auto # Unknown to clang-format-5.0 -ObjCBlockIndentWidth: 8 -ObjCSpaceAfterProperty: true -ObjCSpaceBeforeProtocolList: true - -# Taken from git's rules -#PenaltyBreakAssignment: 10 # Unknown to clang-format-4.0 -PenaltyBreakBeforeFirstCallParameter: 30 -PenaltyBreakComment: 10 -PenaltyBreakFirstLessLess: 0 -PenaltyBreakString: 10 -PenaltyExcessCharacter: 100 -PenaltyReturnTypeOnItsOwnLine: 60 - -PointerAlignment: Right -ReflowComments: false -SortIncludes: false -#SortUsingDeclarations: false # Unknown to clang-format-4.0 -SpaceAfterCStyleCast: false -SpaceAfterTemplateKeyword: true -SpaceBeforeAssignmentOperators: true -#SpaceBeforeCtorInitializerColon: true # Unknown to clang-format-5.0 -#SpaceBeforeInheritanceColon: true # Unknown to clang-format-5.0 -SpaceBeforeParens: ControlStatements -#SpaceBeforeRangeBasedForLoopColon: true # Unknown to clang-format-5.0 -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInAngles: false -SpacesInContainerLiterals: false -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false -Standard: Cpp03 -TabWidth: 8 -UseTab: Always -... diff -Naur linux-5.3-rc6/.cocciconfig linux-5.3-rc6-agd5fed/.cocciconfig --- linux-5.3-rc6/.cocciconfig 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/.cocciconfig 1969-12-31 18:00:00.000000000 -0600 @@ -1,3 +0,0 @@ -[spatch] - options = --timeout 200 - options = --use-gitgrep diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/arm,pl11x.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/arm,pl11x.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/arm,pl11x.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/arm,pl11x.txt 2019-08-31 15:01:11.823736165 -0500 @@ -39,9 +39,11 @@ - port: describes LCD panel signals, following the common binding for video transmitter interfaces; see - Documentation/devicetree/bindings/media/video-interfaces.txt; - when it is a TFT panel, the port's endpoint must define the - following property: + Documentation/devicetree/bindings/media/video-interfaces.txt + +Deprecated properties: + The port's endbpoint subnode had this, now deprecated property + in the past. Drivers should be able to survive without it: - arm,pl11x,tft-r0g0b0-pads: an array of three 32-bit values, defining the way CLD pads are wired up; first value @@ -80,7 +82,6 @@ port { clcd_pads: endpoint { remote-endpoint = <&clcd_panel>; - arm,pl11x,tft-r0g0b0-pads = <0 8 16>; }; }; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/bridge/sii902x.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/bridge/sii902x.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/bridge/sii902x.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/bridge/sii902x.txt 2019-08-31 15:01:11.823736165 -0500 @@ -26,9 +26,8 @@ - clocks: phandle and clock specifier for each clock listed in the clock-names property - clock-names: "mclk" - Describes SII902x MCLK input. MCLK is used to produce - HDMI audio CTS values. This property is required if - "#sound-dai-cells"-property is present. This property follows + Describes SII902x MCLK input. MCLK can be used to produce + HDMI audio CTS values. This property follows Documentation/devicetree/bindings/clock/clock-bindings.txt consumer binding. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,26 +0,0 @@ -Ampire AM-480272H3TMQW-T01H 4.3" WQVGA TFT LCD panel - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. - -Required properties: -- compatible: should be "ampire,am-480272h3tmqw-t01h" - -Optional properties: -- power-supply: regulator to provide the supply voltage -- enable-gpios: GPIO pin to enable or disable the panel -- backlight: phandle of the backlight device attached to the panel - -Optional nodes: -- Video port for RGB input. - -Example: - panel_rgb: panel-rgb { - compatible = "ampire,am-480272h3tmqw-t01h"; - enable-gpios = <&gpioa 8 1>; - port { - panel_in_rgb: endpoint { - remote-endpoint = <&controller_out_rgb>; - }; - }; - }; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ampire,am-480272h3tmqw-t01h.yaml 2019-08-31 15:01:11.823736165 -0500 @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/ampire,am-480272h3tmqw-t01h.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Ampire AM-480272H3TMQW-T01H 4.3" WQVGA TFT LCD panel + +maintainers: + - Yannick Fertre + - Thierry Reding + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: ampire,am-480272h3tmqw-t01h + + power-supply: true + enable-gpios: true + backlight: true + port: true + +required: + - compatible + +additionalProperties: false + +examples: + - | + panel_rgb: panel { + compatible = "ampire,am-480272h3tmqw-t01h"; + enable-gpios = <&gpioa 8 1>; + port { + panel_in_rgb: endpoint { + remote-endpoint = <&controller_out_rgb>; + }; + }; + }; + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,9 +0,0 @@ -Armadeus ST0700 Adapt. A Santek ST0700I5Y-RBSLW 7.0" WVGA (800x480) TFT with -an adapter board. - -Required properties: -- compatible: "armadeus,st0700-adapt" -- power-supply: see panel-common.txt - -Optional properties: -- backlight: see panel-common.txt diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.yaml 2019-08-31 15:01:11.823736165 -0500 @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/armadeus,st0700-adapt.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Armadeus ST0700 Adapter + +description: + A Santek ST0700I5Y-RBSLW 7.0" WVGA (800x480) TFT with an adapter board. + +maintainers: + - '"Sébastien Szymanski" ' + - Thierry Reding + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: armadeus,st0700-adapt + + power-supply: true + backlight: true + port: true + +additionalProperties: false + +required: + - compatible + - power-supply + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/arm,versatile-tft-panel.txt 2019-08-31 15:01:11.823736165 -0500 @@ -10,7 +10,7 @@ - compatible: should be "arm,versatile-tft-panel" Required subnodes: -- port: see display/panel/panel-common.txt, graph.txt +- port: see display/panel/panel-common.yaml, graph.txt Example: diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,12 +0,0 @@ -Banana Pi 7" (S070WV20-CT16) TFT LCD Panel - -Required properties: -- compatible: should be "bananapi,s070wv20-ct16" -- power-supply: see ./panel-common.txt - -Optional properties: -- enable-gpios: see ./simple-panel.txt -- backlight: see ./simple-panel.txt - -This binding is compatible with the simple-panel binding, which is specified -in ./simple-panel.txt. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/bananapi,s070wv20-ct16.yaml 2019-08-31 15:01:11.823736165 -0500 @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/bananapi,s070wv20-ct16.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Banana Pi 7" (S070WV20-CT16) TFT LCD Panel + +maintainers: + - Chen-Yu Tsai + - Thierry Reding + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: bananapi,s070wv20-ct16 + + power-supply: true + backlight: true + enable-gpios: true + port: true + +additionalProperties: false + +required: + - compatible + - power-supply + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/boe,himax8279d.txt 2019-08-31 15:01:11.823736165 -0500 @@ -0,0 +1,24 @@ +Boe Himax8279d 1200x1920 TFT LCD panel + +Required properties: +- compatible: should be "boe,himax8279d8p" and one of: "boe,himax8279d10p" +- reg: DSI virtual channel of the peripheral +- enable-gpios: panel enable gpio +- pp33-gpios: a GPIO phandle for the 3.3v pin that provides the supply voltage +- pp18-gpios: a GPIO phandle for the 1.8v pin that provides the supply voltage + +Optional properties: +- backlight: phandle of the backlight device attached to the panel + +Example: + + &mipi_dsi { + panel { + compatible = "boe,himax8279d8p", "boe,himax8279d10p"; + reg = <0>; + backlight = <&backlight>; + enable-gpios = <&gpio 45 GPIO_ACTIVE_HIGH>; + pp33-gpios = <&gpio 35 GPIO_ACTIVE_HIGH>; + pp18-gpios = <&gpio 36 GPIO_ACTIVE_HIGH>; + }; + }; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,13 +0,0 @@ -DLC Display Co. DLC0700YZG-1 7.0" WSVGA TFT LCD panel - -Required properties: -- compatible: should be "dlc,dlc0700yzg-1" -- power-supply: See simple-panel.txt - -Optional properties: -- reset-gpios: See panel-common.txt -- enable-gpios: See simple-panel.txt -- backlight: See simple-panel.txt - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.yaml 2019-08-31 15:01:11.823736165 -0500 @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/dlc,dlc0700yzg-1.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: DLC Display Co. DLC0700YZG-1 7.0" WSVGA TFT LCD panel + +maintainers: + - Philipp Zabel + - Thierry Reding + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: dlc,dlc0700yzg-1 + + reset-gpios: true + enable-gpios: true + backlight: true + port: true + +additionalProperties: false + +required: + - compatible + - power-supply + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/edt,et-series.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/edt,et-series.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/edt,et-series.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/edt,et-series.txt 2019-08-31 15:01:11.823736165 -0500 @@ -40,7 +40,7 @@ | Identifier | compatbile | description | +=================+=====================+=====================================+ | ETM0700G0DH6 | edt,etm070080dh6 | WVGA TFT Display with capacitive | -| | | Touchscreen | +| | edt,etm0700g0dh6 | Touchscreen | +-----------------+---------------------+-------------------------------------+ | ETM0700G0BDH6 | edt,etm070080bdh6 | Same as ETM0700G0DH6 but with | | | | inverted pixel clock. | diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/giantplus,gpm940b0.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/giantplus,gpm940b0.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/giantplus,gpm940b0.txt 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/giantplus,gpm940b0.txt 2019-08-31 15:01:11.823736165 -0500 @@ -0,0 +1,12 @@ +GiantPlus 3.0" (320x240 pixels) 24-bit TFT LCD panel + +Required properties: +- compatible: should be "giantplus,gpm940b0" +- power-supply: as specified in the base binding + +Optional properties: +- backlight: as specified in the base binding +- enable-gpios: as specified in the base binding + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,7 +0,0 @@ -Innolux Corporation 10.1" EE101IA-01D WXGA (1280x800) LVDS panel - -Required properties: -- compatible: should be "innolux,ee101ia-01d" - -This binding is compatible with the lvds-panel binding, which is specified -in panel-lvds.txt in this directory. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.yaml 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/innolux,ee101ia-01d.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Innolux Corporation 10.1" EE101IA-01D WXGA (1280x800) LVDS panel + +maintainers: + - Heiko Stuebner + - Thierry Reding + +allOf: + - $ref: lvds.yaml# + +properties: + compatible: + items: + - const: innolux,ee101ia-01d + - {} # panel-lvds, but not listed here to avoid false select + + backlight: true + enable-gpios: true + power-supply: true + width-mm: true + height-mm: true + panel-timing: true + port: true + +additionalProperties: false +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.txt 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.txt 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,42 @@ +King Display KD035G6-54NT 3.5" (320x240 pixels) 24-bit TFT LCD panel + +Required properties: +- compatible: should be "kingdisplay,kd035g6-54nt" +- power-supply: See panel-common.txt +- reset-gpios: See panel-common.txt + +Optional properties: +- backlight: see panel-common.txt + +The generic bindings for the SPI slaves documented in [1] also apply. + +The device node can contain one 'port' child node with one child +'endpoint' node, according to the bindings defined in [2]. This +node should describe panel's video bus. + +[1]: Documentation/devicetree/bindings/spi/spi-bus.txt +[2]: Documentation/devicetree/bindings/graph.txt + +Example: + +&spi { + panel@0 { + compatible = "kingdisplay,kd035g6-54nt"; + reg = <0>; + + spi-max-frequency = <3125000>; + spi-3wire; + spi-cs-high; + + reset-gpios = <&gpe 2 GPIO_ACTIVE_LOW>; + + backlight = <&backlight>; + power-supply = <&ldo6>; + + port { + panel_input: endpoint { + remote-endpoint = <&panel_output>; + }; + }; + }; +}; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/lvds.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/lvds.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/lvds.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/lvds.yaml 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/lvds.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: LVDS Display Panel + +maintainers: + - Laurent Pinchart + - Thierry Reding + +description: |+ + LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple + incompatible data link layers have been used over time to transmit image data + to LVDS panels. This bindings supports display panels compatible with the + following specifications. + + [JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February + 1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA) + [LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National + Semiconductor + [VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video + Electronics Standards Association (VESA) + + Device compatible with those specifications have been marketed under the + FPD-Link and FlatLink brands. + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + contains: + const: panel-lvds + description: + Shall contain "panel-lvds" in addition to a mandatory panel-specific + compatible string defined in individual panel bindings. The "panel-lvds" + value shall never be used on its own. + + data-mapping: + enum: + - jeida-18 + - jeida-24 + - vesa-24 + description: | + The color signals mapping order. + + LVDS data mappings are defined as follows. + + - "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and + [VESA] specifications. Data are transferred as follows on 3 LVDS lanes. + + Slot 0 1 2 3 4 5 6 + ________________ _________________ + Clock \_______________________/ + ______ ______ ______ ______ ______ ______ ______ + DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< + DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< + DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< + + - "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI] + specifications. Data are transferred as follows on 4 LVDS lanes. + + Slot 0 1 2 3 4 5 6 + ________________ _________________ + Clock \_______________________/ + ______ ______ ______ ______ ______ ______ ______ + DATA0 ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__>< + DATA1 ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__>< + DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__>< + DATA3 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__>< + + - "vesa-24" - 24-bit data mapping compatible with the [VESA] specification. + Data are transferred as follows on 4 LVDS lanes. + + Slot 0 1 2 3 4 5 6 + ________________ _________________ + Clock \_______________________/ + ______ ______ ______ ______ ______ ______ ______ + DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< + DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< + DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< + DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__>< + + Control signals are mapped as follows. + + CTL0: HSync + CTL1: VSync + CTL2: Data Enable + CTL3: 0 + + data-mirror: + type: boolean + description: + If set, reverse the bit order described in the data mappings below on all + data lanes, transmitting bits for slots 6 to 0 instead of 0 to 6. + +required: + - compatible + - data-mapping + - width-mm + - height-mm + - panel-timing + - port + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,47 +0,0 @@ -Mitsubishi AA204XD12 LVDS Display Panel -======================================= - -The AA104XD12 is a 10.4" XGA TFT-LCD display panel. - -These DT bindings follow the LVDS panel bindings defined in panel-lvds.txt -with the following device-specific properties. - - -Required properties: - -- compatible: Shall contain "mitsubishi,aa121td01" and "panel-lvds", in that - order. -- vcc-supply: Reference to the regulator powering the panel VCC pins. - - -Example -------- - -panel { - compatible = "mitsubishi,aa104xd12", "panel-lvds"; - vcc-supply = <&vcc_3v3>; - - width-mm = <210>; - height-mm = <158>; - - data-mapping = "jeida-24"; - - panel-timing { - /* 1024x768 @65Hz */ - clock-frequency = <65000000>; - hactive = <1024>; - vactive = <768>; - hsync-len = <136>; - hfront-porch = <20>; - hback-porch = <160>; - vfront-porch = <3>; - vback-porch = <29>; - vsync-len = <6>; - }; - - port { - panel_in: endpoint { - remote-endpoint = <&lvds_encoder>; - }; - }; -}; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa104xd12.yaml 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/mitsubishi,aa104xd12.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Mitsubishi AA104XD12 10.4" XGA LVDS Display Panel + +maintainers: + - Laurent Pinchart + - Thierry Reding + +allOf: + - $ref: lvds.yaml# + +properties: + compatible: + items: + - const: mitsubishi,aa104xd12 + - {} # panel-lvds, but not listed here to avoid false select + + vcc-supply: + description: Reference to the regulator powering the panel VCC pins. + + data-mapping: + const: jeida-24 + + width-mm: + const: 210 + + height-mm: + const: 158 + + panel-timing: true + port: true + +additionalProperties: false + +required: + - compatible + - vcc-supply + +examples: + - |+ + + panel { + compatible = "mitsubishi,aa104xd12", "panel-lvds"; + vcc-supply = <&vcc_3v3>; + + width-mm = <210>; + height-mm = <158>; + + data-mapping = "jeida-24"; + + panel-timing { + /* 1024x768 @65Hz */ + clock-frequency = <65000000>; + hactive = <1024>; + vactive = <768>; + hsync-len = <136>; + hfront-porch = <20>; + hback-porch = <160>; + vfront-porch = <3>; + vback-porch = <29>; + vsync-len = <6>; + }; + + port { + panel_in: endpoint { + remote-endpoint = <&lvds_encoder>; + }; + }; + }; + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,47 +0,0 @@ -Mitsubishi AA121TD01 LVDS Display Panel -======================================= - -The AA121TD01 is a 12.1" WXGA TFT-LCD display panel. - -These DT bindings follow the LVDS panel bindings defined in panel-lvds.txt -with the following device-specific properties. - - -Required properties: - -- compatible: Shall contain "mitsubishi,aa121td01" and "panel-lvds", in that - order. -- vcc-supply: Reference to the regulator powering the panel VCC pins. - - -Example -------- - -panel { - compatible = "mitsubishi,aa121td01", "panel-lvds"; - vcc-supply = <&vcc_3v3>; - - width-mm = <261>; - height-mm = <163>; - - data-mapping = "jeida-24"; - - panel-timing { - /* 1280x800 @60Hz */ - clock-frequency = <71000000>; - hactive = <1280>; - vactive = <800>; - hsync-len = <70>; - hfront-porch = <20>; - hback-porch = <70>; - vsync-len = <5>; - vfront-porch = <3>; - vback-porch = <15>; - }; - - port { - panel_in: endpoint { - remote-endpoint = <&lvds_encoder>; - }; - }; -}; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/mitsubishi,aa121td01.yaml 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/mitsubishi,aa121td01.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Mitsubishi AA121TD01 12.1" WXGA LVDS Display Panel + +maintainers: + - Laurent Pinchart + - Thierry Reding + +allOf: + - $ref: lvds.yaml# + +properties: + compatible: + items: + - const: mitsubishi,aa121td01 + - {} # panel-lvds, but not listed here to avoid false select + + vcc-supply: + description: Reference to the regulator powering the panel VCC pins. + + data-mapping: + const: jeida-24 + + width-mm: + const: 261 + + height-mm: + const: 163 + + panel-timing: true + port: true + +additionalProperties: false + +required: + - compatible + - vcc-supply + +examples: + - |+ + panel { + compatible = "mitsubishi,aa121td01", "panel-lvds"; + vcc-supply = <&vcc_3v3>; + + width-mm = <261>; + height-mm = <163>; + + data-mapping = "jeida-24"; + + panel-timing { + /* 1280x800 @60Hz */ + clock-frequency = <71000000>; + hactive = <1280>; + vactive = <800>; + hsync-len = <70>; + hfront-porch = <20>; + hback-porch = <70>; + vsync-len = <5>; + vfront-porch = <3>; + vback-porch = <15>; + }; + + port { + panel_in: endpoint { + remote-endpoint = <&lvds_encoder>; + }; + }; + }; + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m05dtc.txt 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,12 @@ +OrtusTech COM37H3M05DTC Blanview 3.7" VGA portrait TFT-LCD panel + +Required properties: +- compatible: should be "ortustech,com37h3m05dtc" + +Optional properties: +- enable-gpios: GPIO pin to enable or disable the panel +- backlight: phandle of the backlight device attached to the panel +- power-supply: phandle of the regulator that provides the supply voltage + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/ortustech,com37h3m99dtc.txt 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,12 @@ +OrtusTech COM37H3M99DTC Blanview 3.7" VGA portrait TFT-LCD panel + +Required properties: +- compatible: should be "ortustech,com37h3m99dtc" + +Optional properties: +- enable-gpios: GPIO pin to enable or disable the panel +- backlight: phandle of the backlight device attached to the panel +- power-supply: phandle of the regulator that provides the supply voltage + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-common.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-common.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-common.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-common.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,101 +0,0 @@ -Common Properties for Display Panel -=================================== - -This document defines device tree properties common to several classes of -display panels. It doesn't constitue a device tree binding specification by -itself but is meant to be referenced by device tree bindings. - -When referenced from panel device tree bindings the properties defined in this -document are defined as follows. The panel device tree bindings are -responsible for defining whether each property is required or optional. - - -Descriptive Properties ----------------------- - -- width-mm, -- height-mm: The width-mm and height-mm specify the width and height of the - physical area where images are displayed. These properties are expressed in - millimeters and rounded to the closest unit. - -- label: The label property specifies a symbolic name for the panel as a - string suitable for use by humans. It typically contains a name inscribed on - the system (e.g. as an affixed label) or specified in the system's - documentation (e.g. in the user's manual). - - If no such name exists, and unless the property is mandatory according to - device tree bindings, it shall rather be omitted than constructed of - non-descriptive information. For instance an LCD panel in a system that - contains a single panel shall not be labelled "LCD" if that name is not - inscribed on the system or used in a descriptive fashion in system - documentation. - - -Display Timings ---------------- - -- panel-timing: Most display panels are restricted to a single resolution and - require specific display timings. The panel-timing subnode expresses those - timings as specified in the timing subnode section of the display timing - bindings defined in - Documentation/devicetree/bindings/display/panel/display-timing.txt. - - -Connectivity ------------- - -- ports: Panels receive video data through one or multiple connections. While - the nature of those connections is specific to the panel type, the - connectivity is expressed in a standard fashion using ports as specified in - the device graph bindings defined in - Documentation/devicetree/bindings/graph.txt. - -- ddc-i2c-bus: Some panels expose EDID information through an I2C-compatible - bus such as DDC2 or E-DDC. For such panels the ddc-i2c-bus contains a - phandle to the system I2C controller connected to that bus. - - -Control I/Os ------------- - -Many display panels can be controlled through pins driven by GPIOs. The nature -and timing of those control signals are device-specific and left for panel -device tree bindings to specify. The following GPIO specifiers can however be -used for panels that implement compatible control signals. - -- enable-gpios: Specifier for a GPIO connected to the panel enable control - signal. The enable signal is active high and enables operation of the panel. - This property can also be used for panels implementing an active low power - down signal, which is a negated version of the enable signal. Active low - enable signals (or active high power down signals) can be supported by - inverting the GPIO specifier polarity flag. - - Note that the enable signal control panel operation only and must not be - confused with a backlight enable signal. - -- reset-gpios: Specifier for a GPIO coonnected to the panel reset control - signal. The reset signal is active low and resets the panel internal logic - while active. Active high reset signals can be supported by inverting the - GPIO specifier polarity flag. - -Power ------ - -- power-supply: display panels require power to be supplied. While several - panels need more than one power supply with panel-specific constraints - governing the order and timings of the power supplies, in many cases a single - power supply is sufficient, either because the panel has a single power rail, - or because all its power rails can be driven by the same supply. In that case - the power-supply property specifies the supply powering the panel as a phandle - to a regulator. - -Backlight ---------- - -Most display panels include a backlight. Some of them also include a backlight -controller exposed through a control bus such as I2C or DSI. Others expose -backlight control through GPIO, PWM or other signals connected to an external -backlight controller. - -- backlight: For panels whose backlight is controlled by an external backlight - controller, this property contains a phandle that references the controller. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-common.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-common.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-common.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-common.yaml 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,149 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/panel-common.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Common Properties for Display Panels + +maintainers: + - Thierry Reding + - Laurent Pinchart + +description: | + This document defines device tree properties common to several classes of + display panels. It doesn't constitue a device tree binding specification by + itself but is meant to be referenced by device tree bindings. + + When referenced from panel device tree bindings the properties defined in this + document are defined as follows. The panel device tree bindings are + responsible for defining whether each property is required or optional. + +properties: + # Descriptive Properties + width-mm: + description: + Specifies the width of the physical area where images are displayed. This + property is expressed in millimeters and rounded to the closest unit. + + height-mm: + description: + Specifies the height of the physical area where images are displayed. This + property is expressed in millimeters and rounded to the closest unit. + + label: + description: | + The label property specifies a symbolic name for the panel as a + string suitable for use by humans. It typically contains a name inscribed + on the system (e.g. as an affixed label) or specified in the system's + documentation (e.g. in the user's manual). + + If no such name exists, and unless the property is mandatory according to + device tree bindings, it shall rather be omitted than constructed of + non-descriptive information. For instance an LCD panel in a system that + contains a single panel shall not be labelled "LCD" if that name is not + inscribed on the system or used in a descriptive fashion in system + documentation. + + rotation: + description: + Display rotation in degrees counter clockwise (0,90,180,270) + allOf: + - $ref: /schemas/types.yaml#/definitions/uint32 + - enum: [ 0, 90, 180, 270 ] + + # Display Timings + panel-timing: + type: object + description: + Most display panels are restricted to a single resolution and + require specific display timings. The panel-timing subnode expresses those + timings as specified in the timing subnode section of the display timing + bindings defined in + Documentation/devicetree/bindings/display/panel/display-timing.txt. + + # Connectivity + port: + type: object + + ports: + type: object + description: + Panels receive video data through one or multiple connections. While + the nature of those connections is specific to the panel type, the + connectivity is expressed in a standard fashion using ports as specified + in the device graph bindings defined in + Documentation/devicetree/bindings/graph.txt. + + ddc-i2c-bus: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Some panels expose EDID information through an I2C-compatible + bus such as DDC2 or E-DDC. For such panels the ddc-i2c-bus contains a + phandle to the system I2C controller connected to that bus. + + no-hpd: + type: boolean + description: + This panel is supposed to communicate that it's ready via HPD + (hot plug detect) signal, but the signal isn't hooked up so we should + hardcode the max delay from the panel spec when powering up the panel. + + # Control I/Os + + # Many display panels can be controlled through pins driven by GPIOs. The nature + # and timing of those control signals are device-specific and left for panel + # device tree bindings to specify. The following GPIO specifiers can however be + # used for panels that implement compatible control signals. + + enable-gpios: + maxItems: 1 + description: | + Specifier for a GPIO connected to the panel enable control signal. The + enable signal is active high and enables operation of the panel. This + property can also be used for panels implementing an active low power down + signal, which is a negated version of the enable signal. Active low enable + signals (or active high power down signals) can be supported by inverting + the GPIO specifier polarity flag. + + Note that the enable signal control panel operation only and must not be + confused with a backlight enable signal. + + reset-gpios: + maxItems: 1 + description: + Specifier for a GPIO connected to the panel reset control signal. + The reset signal is active low and resets the panel internal logic + while active. Active high reset signals can be supported by inverting the + GPIO specifier polarity flag. + + # Power + power-supply: + description: + Display panels require power to be supplied. While several panels need + more than one power supply with panel-specific constraints governing the + order and timings of the power supplies, in many cases a single power + supply is sufficient, either because the panel has a single power rail, or + because all its power rails can be driven by the same supply. In that case + the power-supply property specifies the supply powering the panel as a + phandle to a regulator. + + # Backlight + + # Most display panels include a backlight. Some of them also include a backlight + # controller exposed through a control bus such as I2C or DSI. Others expose + # backlight control through GPIO, PWM or other signals connected to an external + # backlight controller. + + backlight: + $ref: /schemas/types.yaml#/definitions/phandle + description: + For panels whose backlight is controlled by an external backlight + controller, this property contains a phandle that references the + controller. + +dependencies: + width-mm: [ height-mm ] + height-mm: [ width-mm ] + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-lvds.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-lvds.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel-lvds.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel-lvds.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,121 +0,0 @@ -LVDS Display Panel -================== - -LVDS is a physical layer specification defined in ANSI/TIA/EIA-644-A. Multiple -incompatible data link layers have been used over time to transmit image data -to LVDS panels. This bindings supports display panels compatible with the -following specifications. - -[JEIDA] "Digital Interface Standards for Monitor", JEIDA-59-1999, February -1999 (Version 1.0), Japan Electronic Industry Development Association (JEIDA) -[LDI] "Open LVDS Display Interface", May 1999 (Version 0.95), National -Semiconductor -[VESA] "VESA Notebook Panel Standard", October 2007 (Version 1.0), Video -Electronics Standards Association (VESA) - -Device compatible with those specifications have been marketed under the -FPD-Link and FlatLink brands. - - -Required properties: - -- compatible: Shall contain "panel-lvds" in addition to a mandatory - panel-specific compatible string defined in individual panel bindings. The - "panel-lvds" value shall never be used on its own. -- width-mm: See panel-common.txt. -- height-mm: See panel-common.txt. -- data-mapping: The color signals mapping order, "jeida-18", "jeida-24" - or "vesa-24". - -Optional properties: - -- label: See panel-common.txt. -- gpios: See panel-common.txt. -- backlight: See panel-common.txt. -- power-supply: See panel-common.txt. -- data-mirror: If set, reverse the bit order described in the data mappings - below on all data lanes, transmitting bits for slots 6 to 0 instead of - 0 to 6. - -Required nodes: - -- panel-timing: See panel-common.txt. -- ports: See panel-common.txt. These bindings require a single port subnode - corresponding to the panel LVDS input. - - -LVDS data mappings are defined as follows. - -- "jeida-18" - 18-bit data mapping compatible with the [JEIDA], [LDI] and - [VESA] specifications. Data are transferred as follows on 3 LVDS lanes. - -Slot 0 1 2 3 4 5 6 - ________________ _________________ -Clock \_______________________/ - ______ ______ ______ ______ ______ ______ ______ -DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< -DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< -DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< - -- "jeida-24" - 24-bit data mapping compatible with the [DSIM] and [LDI] - specifications. Data are transferred as follows on 4 LVDS lanes. - -Slot 0 1 2 3 4 5 6 - ________________ _________________ -Clock \_______________________/ - ______ ______ ______ ______ ______ ______ ______ -DATA0 ><__G2__><__R7__><__R6__><__R5__><__R4__><__R3__><__R2__>< -DATA1 ><__B3__><__B2__><__G7__><__G6__><__G5__><__G4__><__G3__>< -DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B7__><__B6__><__B5__><__B4__>< -DATA3 ><_CTL3_><__B1__><__B0__><__G1__><__G0__><__R1__><__R0__>< - -- "vesa-24" - 24-bit data mapping compatible with the [VESA] specification. - Data are transferred as follows on 4 LVDS lanes. - -Slot 0 1 2 3 4 5 6 - ________________ _________________ -Clock \_______________________/ - ______ ______ ______ ______ ______ ______ ______ -DATA0 ><__G0__><__R5__><__R4__><__R3__><__R2__><__R1__><__R0__>< -DATA1 ><__B1__><__B0__><__G5__><__G4__><__G3__><__G2__><__G1__>< -DATA2 ><_CTL2_><_CTL1_><_CTL0_><__B5__><__B4__><__B3__><__B2__>< -DATA3 ><_CTL3_><__B7__><__B6__><__G7__><__G6__><__R7__><__R6__>< - -Control signals are mapped as follows. - -CTL0: HSync -CTL1: VSync -CTL2: Data Enable -CTL3: 0 - - -Example -------- - -panel { - compatible = "mitsubishi,aa121td01", "panel-lvds"; - - width-mm = <261>; - height-mm = <163>; - - data-mapping = "jeida-24"; - - panel-timing { - /* 1280x800 @60Hz */ - clock-frequency = <71000000>; - hactive = <1280>; - vactive = <800>; - hsync-len = <70>; - hfront-porch = <20>; - hback-porch = <70>; - vsync-len = <5>; - vfront-porch = <3>; - vback-porch = <15>; - }; - - port { - panel_in: endpoint { - remote-endpoint = <&lvds_encoder>; - }; - }; -}; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/panel.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/panel.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,4 +0,0 @@ -Common display properties -------------------------- - -- rotation: Display rotation in degrees counter clockwise (0,90,180,270) diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,14 +0,0 @@ -PDA 91-00156-A0 5.0" WVGA TFT LCD panel - -Required properties: -- compatible: should be "pda,91-00156-a0" -- power-supply: this panel requires a single power supply. A phandle to a -regulator needs to be specified here. Compatible with panel-common binding which -is specified in the panel-common.txt in this directory. -- backlight: this panel's backlight is controlled by an external backlight -controller. A phandle to this controller needs to be specified here. -Compatible with panel-common binding which is specified in the panel-common.txt -in this directory. - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.yaml 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/pda,91-00156-a0.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: PDA 91-00156-A0 5.0" WVGA TFT LCD panel + +maintainers: + - Cristian Birsan + - Thierry Reding + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: pda,91-00156-a0 + + power-supply: true + backlight: true + port: true + +additionalProperties: false + +required: + - compatible + - power-supply + - backlight + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,49 +0,0 @@ -This binding covers the official 7" (800x480) Raspberry Pi touchscreen -panel. - -This DSI panel contains: - -- TC358762 DSI->DPI bridge -- Atmel microcontroller on I2C for power sequencing the DSI bridge and - controlling backlight -- Touchscreen controller on I2C for touch input - -and this binding covers the DSI display parts but not its touch input. - -Required properties: -- compatible: Must be "raspberrypi,7inch-touchscreen-panel" -- reg: Must be "45" -- port: See panel-common.txt - -Example: - -dsi1: dsi@7e700000 { - #address-cells = <1>; - #size-cells = <0>; - <...> - - port { - dsi_out_port: endpoint { - remote-endpoint = <&panel_dsi_port>; - }; - }; -}; - -i2c_dsi: i2c { - compatible = "i2c-gpio"; - #address-cells = <1>; - #size-cells = <0>; - gpios = <&gpio 28 0 - &gpio 29 0>; - - lcd@45 { - compatible = "raspberrypi,7inch-touchscreen-panel"; - reg = <0x45>; - - port { - panel_dsi_port: endpoint { - remote-endpoint = <&dsi_out_port>; - }; - }; - }; -}; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raspberrypi,7inch-touchscreen.yaml 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/raspberrypi,7inch-touchscreen.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: The official 7" (800x480) Raspberry Pi touchscreen + +maintainers: + - Eric Anholt + - Thierry Reding + +description: |+ + This DSI panel contains: + + - TC358762 DSI->DPI bridge + - Atmel microcontroller on I2C for power sequencing the DSI bridge and + controlling backlight + - Touchscreen controller on I2C for touch input + + and this binding covers the DSI display parts but not its touch input. + +properties: + compatible: + const: raspberrypi,7inch-touchscreen-panel + + reg: + const: 0x45 + + port: true + +required: + - compatible + - reg + - port + +additionalProperties: false + +examples: + - |+ + dsi1: dsi { + #address-cells = <1>; + #size-cells = <0>; + + port { + dsi_out_port: endpoint { + remote-endpoint = <&panel_dsi_port>; + }; + }; + }; + + i2c_dsi: i2c { + compatible = "i2c-gpio"; + #address-cells = <1>; + #size-cells = <0>; + scl-gpios = <&gpio 28 0>; + sda-gpios = <&gpio 29 0>; + + lcd@45 { + compatible = "raspberrypi,7inch-touchscreen-panel"; + reg = <0x45>; + + port { + panel_dsi_port: endpoint { + remote-endpoint = <&dsi_out_port>; + }; + }; + }; + }; + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raydium,rm67191.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raydium,rm67191.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/raydium,rm67191.txt 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/raydium,rm67191.txt 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,41 @@ +Raydium RM67171 OLED LCD panel with MIPI-DSI protocol + +Required properties: +- compatible: "raydium,rm67191" +- reg: virtual channel for MIPI-DSI protocol + must be <0> +- dsi-lanes: number of DSI lanes to be used + must be <3> or <4> +- port: input port node with endpoint definition as + defined in Documentation/devicetree/bindings/graph.txt; + the input port should be connected to a MIPI-DSI device + driver + +Optional properties: +- reset-gpios: a GPIO spec for the RST_B GPIO pin +- v3p3-supply: phandle to 3.3V regulator that powers the VDD_3V3 pin +- v1p8-supply: phandle to 1.8V regulator that powers the VDD_1V8 pin +- width-mm: see panel-common.txt +- height-mm: see panel-common.txt +- video-mode: 0 - burst-mode + 1 - non-burst with sync event + 2 - non-burst with sync pulse + +Example: + + panel@0 { + compatible = "raydium,rm67191"; + reg = <0>; + pinctrl-0 = <&pinctrl_mipi_dsi_0_1_en>; + pinctrl-names = "default"; + reset-gpios = <&gpio1 7 GPIO_ACTIVE_LOW>; + dsi-lanes = <4>; + width-mm = <68>; + height-mm = <121>; + + port { + panel_in: endpoint { + remote-endpoint = <&mipi_out>; + }; + }; + }; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/rocktech,jh057n00900.txt 2019-08-31 15:01:11.824736165 -0500 @@ -5,6 +5,9 @@ - reg: DSI virtual channel of the peripheral - reset-gpios: panel reset gpio - backlight: phandle of the backlight device attached to the panel +- vcc-supply: phandle of the regulator that provides the vcc supply voltage. +- iovcc-supply: phandle of the regulator that provides the iovcc supply + voltage. Example: @@ -14,5 +17,7 @@ reg = <0>; backlight = <&backlight>; reset-gpios = <&gpio3 13 GPIO_ACTIVE_LOW>; + vcc-supply = <®_2v8_p>; + iovcc-supply = <®_1v8_p>; }; }; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,41 +0,0 @@ -Solomon Goldentek Display GKTW70SDAE4SE LVDS Display Panel -========================================================== - -The GKTW70SDAE4SE is a 7" WVGA TFT-LCD display panel. - -These DT bindings follow the LVDS panel bindings defined in panel-lvds.txt -with the following device-specific properties. - -Required properties: - -- compatible: Shall contain "sgd,gktw70sdae4se" and "panel-lvds", in that order. - -Example -------- - -panel { - compatible = "sgd,gktw70sdae4se", "panel-lvds"; - - width-mm = <153>; - height-mm = <86>; - - data-mapping = "jeida-18"; - - panel-timing { - clock-frequency = <32000000>; - hactive = <800>; - vactive = <480>; - hback-porch = <39>; - hfront-porch = <39>; - vback-porch = <29>; - vfront-porch = <13>; - hsync-len = <47>; - vsync-len = <2>; - }; - - port { - panel_in: endpoint { - remote-endpoint = <&lvds_encoder>; - }; - }; -}; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sgd,gktw70sdae4se.yaml 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/sgd,gktw70sdae4se.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Solomon Goldentek Display GKTW70SDAE4SE 7" WVGA LVDS Display Panel + +maintainers: + - Neil Armstrong + - Thierry Reding + +allOf: + - $ref: lvds.yaml# + +properties: + compatible: + items: + - const: sgd,gktw70sdae4se + - {} # panel-lvds, but not listed here to avoid false select + + data-mapping: + const: jeida-18 + + width-mm: + const: 153 + + height-mm: + const: 86 + + panel-timing: true + port: true + +additionalProperties: false + +required: + - compatible + +examples: + - |+ + panel { + compatible = "sgd,gktw70sdae4se", "panel-lvds"; + + width-mm = <153>; + height-mm = <86>; + + data-mapping = "jeida-18"; + + panel-timing { + clock-frequency = <32000000>; + hactive = <800>; + vactive = <480>; + hback-porch = <39>; + hfront-porch = <39>; + vback-porch = <29>; + vfront-porch = <13>; + hsync-len = <47>; + vsync-len = <2>; + }; + + port { + panel_in: endpoint { + remote-endpoint = <&lvds_encoder>; + }; + }; + }; + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.txt 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,ld-d5116z01b.txt 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,26 @@ +Sharp LD-D5116Z01B 12.3" WUXGA+ eDP panel + +Required properties: +- compatible: should be "sharp,ld-d5116z01b" +- power-supply: regulator to provide the VCC supply voltage (3.3 volts) + +This binding is compatible with the simple-panel binding. + +The device node can contain one 'port' child node with one child +'endpoint' node, according to the bindings defined in [1]. This +node should describe panel's video bus. + +[1]: Documentation/devicetree/bindings/media/video-interfaces.txt + +Example: + + panel: panel { + compatible = "sharp,ld-d5116z01b"; + power-supply = <&vlcd_3v3>; + + port { + panel_ep: endpoint { + remote-endpoint = <&bridge_out_ep>; + }; + }; + }; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,lq070y3dg3b.txt 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,12 @@ +Sharp LQ070Y3DG3B 7.0" WVGA landscape TFT LCD panel + +Required properties: +- compatible: should be "sharp,lq070y3dg3b" + +Optional properties: +- enable-gpios: GPIO pin to enable or disable the panel +- backlight: phandle of the backlight device attached to the panel +- power-supply: phandle of the regulator that provides the supply voltage + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,ls020b1dd01d.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,ls020b1dd01d.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/sharp,ls020b1dd01d.txt 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/sharp,ls020b1dd01d.txt 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,12 @@ +Sharp 2.0" (240x160 pixels) 16-bit TFT LCD panel + +Required properties: +- compatible: should be "sharp,ls020b1dd01d" +- power-supply: as specified in the base binding + +Optional properties: +- backlight: as specified in the base binding +- enable-gpios: as specified in the base binding + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/simple-panel.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/simple-panel.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/simple-panel.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/simple-panel.txt 2019-08-31 15:01:11.824736165 -0500 @@ -1,28 +1 @@ -Simple display panel -==================== - -panel node ----------- - -Required properties: -- power-supply: See panel-common.txt - -Optional properties: -- ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing -- enable-gpios: GPIO pin to enable or disable the panel -- backlight: phandle of the backlight device attached to the panel -- no-hpd: This panel is supposed to communicate that it's ready via HPD - (hot plug detect) signal, but the signal isn't hooked up so we should - hardcode the max delay from the panel spec when powering up the panel. - -Example: - - panel: panel { - compatible = "cptt,claa101wb01"; - ddc-i2c-bus = <&panelddc>; - - power-supply = <&vdd_pnl_reg>; - enable-gpios = <&gpio 90 0>; - - backlight = <&backlight>; - }; +See panel-common.yaml in this directory. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,15 +0,0 @@ -TFC S9700RTWV43TR-01B 7" Three Five Corp 800x480 LCD panel with -resistive touch - -The panel is found on TI AM335x-evm. - -Required properties: -- compatible: should be "tfc,s9700rtwv43tr-01b" -- power-supply: See panel-common.txt - -Optional properties: -- enable-gpios: GPIO pin to enable or disable the panel, if there is one -- backlight: phandle of the backlight device attached to the panel - -This binding is compatible with the simple-panel binding, which is specified -in simple-panel.txt in this directory. diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.yaml 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/tfc,s9700rtwv43tr-01b.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: TFC S9700RTWV43TR-01B 7" Three Five Corp 800x480 LCD panel with resistive touch + +maintainers: + - Jyri Sarha + - Thierry Reding + +description: |+ + The panel is found on TI AM335x-evm. + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + const: tfc,s9700rtwv43tr-01b + + enable-gpios: true + backlight: true + port: true + +additionalProperties: false + +required: + - compatible + - power-supply + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tpo,tpg110.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tpo,tpg110.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tpo,tpg110.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tpo,tpg110.txt 1969-12-31 18:00:00.000000000 -0600 @@ -1,70 +0,0 @@ -TPO TPG110 Panel -================ - -This panel driver is a component that acts as an intermediary -between an RGB output and a variety of panels. The panel -driver is strapped up in electronics to the desired resolution -and other properties, and has a control interface over 3WIRE -SPI. By talking to the TPG110 over SPI, the strapped properties -can be discovered and the hardware is therefore mostly -self-describing. - - +--------+ -SPI -> | TPO | -> physical display -RGB -> | TPG110 | - +--------+ - -If some electrical strap or alternate resolution is desired, -this can be set up by taking software control of the display -over the SPI interface. The interface can also adjust -for properties of the display such as gamma correction and -certain electrical driving levels. - -The TPG110 does not know the physical dimensions of the panel -connected, so this needs to be specified in the device tree. - -It requires a GPIO line for control of its reset line. - -The serial protocol has line names that resemble I2C but the -protocol is not I2C but 3WIRE SPI. - -Required properties: -- compatible : one of: - "ste,nomadik-nhk15-display", "tpo,tpg110" - "tpo,tpg110" -- grestb-gpios : panel reset GPIO -- width-mm : see display/panel/panel-common.txt -- height-mm : see display/panel/panel-common.txt - -The device needs to be a child of an SPI bus, see -spi/spi-bus.txt. The SPI child must set the following -properties: -- spi-3wire -- spi-max-frequency = <3000000>; -as these are characteristics of this device. - -The device node can contain one 'port' child node with one child -'endpoint' node, according to the bindings defined in -media/video-interfaces.txt. This node should describe panel's video bus. - -Example -------- - -panel: display@0 { - compatible = "tpo,tpg110"; - reg = <0>; - spi-3wire; - /* 320 ns min period ~= 3 MHz */ - spi-max-frequency = <3000000>; - /* Width and height from data sheet */ - width-mm = <116>; - height-mm = <87>; - grestb-gpios = <&foo_gpio 5 GPIO_ACTIVE_LOW>; - backlight = <&bl>; - - port { - nomadik_clcd_panel: endpoint { - remote-endpoint = <&foo>; - }; - }; -}; diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml --- linux-5.3-rc6/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/panel/tpo,tpg110.yaml 2019-08-31 15:01:11.824736165 -0500 @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: GPL-2.0 +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/display/panel/tpo,tpg110.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: TPO TPG110 Panel + +maintainers: + - Linus Walleij + - Thierry Reding + +description: |+ + This panel driver is a component that acts as an intermediary + between an RGB output and a variety of panels. The panel + driver is strapped up in electronics to the desired resolution + and other properties, and has a control interface over 3WIRE + SPI. By talking to the TPG110 over SPI, the strapped properties + can be discovered and the hardware is therefore mostly + self-describing. + + +--------+ + SPI -> | TPO | -> physical display + RGB -> | TPG110 | + +--------+ + + If some electrical strap or alternate resolution is desired, + this can be set up by taking software control of the display + over the SPI interface. The interface can also adjust + for properties of the display such as gamma correction and + certain electrical driving levels. + + The TPG110 does not know the physical dimensions of the panel + connected, so this needs to be specified in the device tree. + + It requires a GPIO line for control of its reset line. + + The serial protocol has line names that resemble I2C but the + protocol is not I2C but 3WIRE SPI. + + +allOf: + - $ref: panel-common.yaml# + +properties: + compatible: + oneOf: + - items: + - enum: + - ste,nomadik-nhk15-display + - const: tpo,tpg110 + - const: tpo,tpg110 + + reg: true + + grestb-gpios: + maxItems: 1 + description: panel reset GPIO + + spi-3wire: true + + spi-max-frequency: + const: 3000000 + +required: + - compatible + - reg + - grestb-gpios + - width-mm + - height-mm + - spi-3wire + - spi-max-frequency + - port + +examples: + - |+ + spi { + #address-cells = <1>; + #size-cells = <0>; + + panel: display@0 { + compatible = "tpo,tpg110"; + reg = <0>; + spi-3wire; + /* 320 ns min period ~= 3 MHz */ + spi-max-frequency = <3000000>; + /* Width and height from data sheet */ + width-mm = <116>; + height-mm = <87>; + grestb-gpios = <&foo_gpio 5 1>; + backlight = <&bl>; + + port { + nomadik_clcd_panel: endpoint { + remote-endpoint = <&foo>; + }; + }; + }; + }; + +... diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/rockchip/rockchip-lvds.txt 2019-08-31 15:01:11.824736165 -0500 @@ -32,17 +32,6 @@ - video port 0 for the VOP input, the remote endpoint maybe vopb or vopl - video port 1 for either a panel or subsequent encoder -the lvds panel described by - Documentation/devicetree/bindings/display/panel/simple-panel.txt - -Panel required properties: -- ports for remote LVDS output - -Panel optional properties: -- data-mapping: should be "vesa-24","jeida-24" or "jeida-18". -This describes decribed by: - Documentation/devicetree/bindings/display/panel/panel-lvds.txt - Example: lvds_panel: lvds-panel { diff -Naur linux-5.3-rc6/Documentation/devicetree/bindings/display/ssd1307fb.txt linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/ssd1307fb.txt --- linux-5.3-rc6/Documentation/devicetree/bindings/display/ssd1307fb.txt 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/devicetree/bindings/display/ssd1307fb.txt 2019-08-31 15:01:11.824736165 -0500 @@ -27,6 +27,15 @@ - solomon,prechargep2: Length of precharge period (phase 2) in clock cycles. This needs to be the higher, the higher the capacitance of the OLED's pixels is + - solomon,dclk-div: Clock divisor 1 to 16 + - solomon,dclk-frq: Clock frequency 0 to 15, higher value means higher + frequency + - solomon,lookup-table: 8 bit value array of current drive pulse widths for + BANK0, and colors A, B, and C. Each value in range + of 31 to 63 for pulse widths of 32 to 64. Color D + is always width 64. + - solomon,area-color-enable: Display uses color mode + - solomon,low-power. Display runs in low power mode [0]: Documentation/devicetree/bindings/pwm/pwm.txt @@ -46,4 +55,5 @@ solomon,com-lrremap; solomon,com-invdir; solomon,com-offset = <32>; + solomon,lookup-table = /bits/ 8 <0x3f 0x3f 0x3f 0x3f>; }; diff -Naur linux-5.3-rc6/Documentation/gpu/drivers.rst linux-5.3-rc6-agd5fed/Documentation/gpu/drivers.rst --- linux-5.3-rc6/Documentation/gpu/drivers.rst 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/gpu/drivers.rst 2019-08-31 15:01:11.824736165 -0500 @@ -11,7 +11,6 @@ meson pl111 tegra - tinydrm tve200 v3d vc4 diff -Naur linux-5.3-rc6/Documentation/gpu/drm-kms-helpers.rst linux-5.3-rc6-agd5fed/Documentation/gpu/drm-kms-helpers.rst --- linux-5.3-rc6/Documentation/gpu/drm-kms-helpers.rst 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/gpu/drm-kms-helpers.rst 2019-08-31 15:01:11.824736165 -0500 @@ -263,6 +263,18 @@ drm_dp_mst_topology_put_port drm_dp_mst_get_mstb_malloc drm_dp_mst_put_mstb_malloc +MIPI DBI Helper Functions Reference +=================================== + +.. kernel-doc:: drivers/gpu/drm/drm_mipi_dbi.c + :doc: overview + +.. kernel-doc:: include/drm/drm_mipi_dbi.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/drm_mipi_dbi.c + :export: + MIPI DSI Helper Functions Reference =================================== diff -Naur linux-5.3-rc6/Documentation/gpu/drm-mm.rst linux-5.3-rc6-agd5fed/Documentation/gpu/drm-mm.rst --- linux-5.3-rc6/Documentation/gpu/drm-mm.rst 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/gpu/drm-mm.rst 2019-08-31 15:01:11.825736165 -0500 @@ -433,43 +433,11 @@ created for the OPTIMUS range of multi-gpu platforms. To userspace PRIME buffers are dma-buf based file descriptors. -Overview and Driver Interface ------------------------------ +Overview and Lifetime Rules +--------------------------- -Similar to GEM global names, PRIME file descriptors are also used to -share buffer objects across processes. They offer additional security: -as file descriptors must be explicitly sent over UNIX domain sockets to -be shared between applications, they can't be guessed like the globally -unique GEM names. - -Drivers that support the PRIME API must set the DRIVER_PRIME bit in the -struct :c:type:`struct drm_driver ` -driver_features field, and implement the prime_handle_to_fd and -prime_fd_to_handle operations. - -int (\*prime_handle_to_fd)(struct drm_device \*dev, struct drm_file -\*file_priv, uint32_t handle, uint32_t flags, int \*prime_fd); int -(\*prime_fd_to_handle)(struct drm_device \*dev, struct drm_file -\*file_priv, int prime_fd, uint32_t \*handle); Those two operations -convert a handle to a PRIME file descriptor and vice versa. Drivers must -use the kernel dma-buf buffer sharing framework to manage the PRIME file -descriptors. Similar to the mode setting API PRIME is agnostic to the -underlying buffer object manager, as long as handles are 32bit unsigned -integers. - -While non-GEM drivers must implement the operations themselves, GEM -drivers must use the :c:func:`drm_gem_prime_handle_to_fd()` and -:c:func:`drm_gem_prime_fd_to_handle()` helper functions. Those -helpers rely on the driver gem_prime_export and gem_prime_import -operations to create a dma-buf instance from a GEM object (dma-buf -exporter role) and to create a GEM object from a dma-buf instance -(dma-buf importer role). - -struct dma_buf \* (\*gem_prime_export)(struct drm_device \*dev, -struct drm_gem_object \*obj, int flags); struct drm_gem_object \* -(\*gem_prime_import)(struct drm_device \*dev, struct dma_buf -\*dma_buf); These two operations are mandatory for GEM drivers that -support PRIME. +.. kernel-doc:: drivers/gpu/drm/drm_prime.c + :doc: overview and lifetime rules PRIME Helper Functions ---------------------- diff -Naur linux-5.3-rc6/Documentation/gpu/i915.rst linux-5.3-rc6-agd5fed/Documentation/gpu/i915.rst --- linux-5.3-rc6/Documentation/gpu/i915.rst 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/gpu/i915.rst 2019-08-31 15:01:11.825736165 -0500 @@ -430,31 +430,31 @@ GuC === +Firmware Layout +------------------- + +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h + :doc: Firmware Layout + GuC-specific firmware loader ---------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fw.c +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c :internal: GuC-based command submission ---------------------------- -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_submission.c +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c :doc: GuC-based command submission -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_submission.c +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c :internal: -GuC Firmware Layout -------------------- - -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fwif.h - :doc: GuC Firmware Layout - GuC Address Space ----------------- -.. kernel-doc:: drivers/gpu/drm/i915/intel_guc.c +.. kernel-doc:: drivers/gpu/drm/i915/gt/uc/intel_guc.c :doc: GuC Address Space Tracing diff -Naur linux-5.3-rc6/Documentation/gpu/introduction.rst linux-5.3-rc6-agd5fed/Documentation/gpu/introduction.rst --- linux-5.3-rc6/Documentation/gpu/introduction.rst 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/gpu/introduction.rst 2019-08-31 15:01:11.825736165 -0500 @@ -51,6 +51,22 @@ Also read the :ref:`guidelines for the kernel documentation at large `. +Documentation Requirements for kAPI +----------------------------------- + +All kernel APIs exported to other modules must be documented, including their +datastructures and at least a short introductory section explaining the overall +concepts. Documentation should be put into the code itself as kerneldoc comments +as much as reasonable. + +Do not blindly document everything, but document only what's relevant for driver +authors: Internal functions of drm.ko and definitely static functions should not +have formal kerneldoc comments. Use normal C comments if you feel like a comment +is warranted. You may use kerneldoc syntax in the comment, but it shall not +start with a /** kerneldoc marker. Similar for data structures, annotate +anything entirely private with ``/* private: */`` comments as per the +documentation guide. + Getting Started =============== diff -Naur linux-5.3-rc6/Documentation/gpu/tinydrm.rst linux-5.3-rc6-agd5fed/Documentation/gpu/tinydrm.rst --- linux-5.3-rc6/Documentation/gpu/tinydrm.rst 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/gpu/tinydrm.rst 1969-12-31 18:00:00.000000000 -0600 @@ -1,30 +0,0 @@ -============================ -drm/tinydrm Tiny DRM drivers -============================ - -tinydrm is a collection of DRM drivers that are so small they can fit in a -single source file. - -Helpers -======= - -.. kernel-doc:: include/drm/tinydrm/tinydrm-helpers.h - :internal: - -.. kernel-doc:: drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c - :export: - -.. kernel-doc:: drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c - :export: - -MIPI DBI Compatible Controllers -=============================== - -.. kernel-doc:: drivers/gpu/drm/tinydrm/mipi-dbi.c - :doc: overview - -.. kernel-doc:: include/drm/tinydrm/mipi-dbi.h - :internal: - -.. kernel-doc:: drivers/gpu/drm/tinydrm/mipi-dbi.c - :export: diff -Naur linux-5.3-rc6/Documentation/gpu/todo.rst linux-5.3-rc6-agd5fed/Documentation/gpu/todo.rst --- linux-5.3-rc6/Documentation/gpu/todo.rst 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/gpu/todo.rst 2019-08-31 15:01:11.825736165 -0500 @@ -162,7 +162,7 @@ A lot of drivers forward gem mmap calls to dma-buf mmap for imported buffers. And also a lot of them forward dma-buf mmap to the gem mmap implementations. -Would be great to refactor this all into a set of small common helpers. +There's drm_gem_prime_mmap() for this now, but still needs to be rolled out. Contact: Daniel Vetter @@ -196,15 +196,6 @@ Contact: Daniel Vetter, Noralf Tronnes -Remove the ->gem_prime_res_obj callback --------------------------------------------- - -The ->gem_prime_res_obj callback can be removed from drivers by using the -reservation_object in the drm_gem_object. It may also be possible to use the -generic drm_gem_reservation_object_wait helper for waiting for a bo. - -Contact: Daniel Vetter - idr_init_base() --------------- @@ -215,22 +206,13 @@ Contact: Daniel Vetter -Defaults for .gem_prime_import and export ------------------------------------------ - -Most drivers don't need to set drm_driver->gem_prime_import and -->gem_prime_export now that drm_gem_prime_import() and drm_gem_prime_export() -are the default. - struct drm_gem_object_funcs --------------------------- GEM objects can now have a function table instead of having the callbacks on the DRM driver struct. This is now the preferred way and drivers can be moved over. -DRM_GEM_CMA_VMAP_DRIVER_OPS, DRM_GEM_SHMEM_DRIVER_OPS already support this, but -DRM_GEM_VRAM_DRIVER_PRIME does not yet and needs to be aligned with the previous -two. We also need a 2nd version of the CMA define that doesn't require the +We also need a 2nd version of the CMA define that doesn't require the vmapping to be present (different hook for prime importing). Plus this needs to be rolled out to all drivers using their own implementations, too. @@ -317,19 +299,6 @@ Contact: Daniel Vetter -Add missing kerneldoc for exported functions --------------------------------------------- - -The DRM reference documentation is still lacking kerneldoc in a few areas. The -task would be to clean up interfaces like moving functions around between -files to better group them and improving the interfaces like dropping return -values for functions that never fail. Then write kerneldoc for all exported -functions and an overview section and integrate it all into the drm book. - -See https://dri.freedesktop.org/docs/drm/ for what's there already. - -Contact: Daniel Vetter - Make panic handling work ------------------------ @@ -393,6 +362,9 @@ this (together with the drm_minor->drm_device move) would allow us to remove debugfs_init. +- Drop the return code and error checking from all debugfs functions. Greg KH is + working on this already. + Contact: Daniel Vetter KMS cleanups @@ -440,38 +412,21 @@ Contact: Daniel Vetter -Driver Specific -=============== +Backlight Refactoring +--------------------- -tinydrm -------- +Backlight drivers have a triple enable/disable state, which is a bit overkill. +Plan to fix this: -Tinydrm is the helper driver for really simple fb drivers. The goal is to make -those drivers as simple as possible, so lots of room for refactoring: +1. Roll out backlight_enable() and backlight_disable() helpers everywhere. This + has started already. +2. In all, only look at one of the three status bits set by the above helpers. +3. Remove the other two status bits. -- backlight helpers, probably best to put them into a new drm_backlight.c. - This is because drivers/video is de-facto unmaintained. We could also - move drivers/video/backlight to drivers/gpu/backlight and take it all - over within drm-misc, but that's more work. Backlight helpers require a fair - bit of reworking and refactoring. A simple example is the enabling of a backlight. - Tinydrm has helpers for this. It would be good if other drivers can also use the - helper. However, there are various cases we need to consider i.e different - drivers seem to have different ways of enabling/disabling a backlight. - We also need to consider the backlight drivers (like gpio_backlight). The situation - is further complicated by the fact that the backlight is tied to fbdev - via fb_notifier_callback() which has complicated logic. For further details, refer - to the following discussion thread: - https://groups.google.com/forum/#!topic/outreachy-kernel/8rBe30lwtdA - -- spi helpers, probably best put into spi core/helper code. Thierry said - the spi maintainer is fast&reactive, so shouldn't be a big issue. - -- extract the mipi-dbi helper (well, the non-tinydrm specific parts at - least) into a separate helper, like we have for mipi-dsi already. Or follow - one of the ideas for having a shared dsi/dbi helper, abstracting away the - transport details more. +Contact: Daniel Vetter -Contact: Noralf Trønnes, Daniel Vetter +Driver Specific +=============== AMD DC Display Driver --------------------- diff -Naur linux-5.3-rc6/Documentation/media/uapi/v4l/subdev-formats.rst linux-5.3-rc6-agd5fed/Documentation/media/uapi/v4l/subdev-formats.rst --- linux-5.3-rc6/Documentation/media/uapi/v4l/subdev-formats.rst 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/Documentation/media/uapi/v4l/subdev-formats.rst 2019-08-31 15:01:11.825736165 -0500 @@ -1305,6 +1305,113 @@ - g\ :sub:`6` - g\ :sub:`5` - g\ :sub:`4` + * .. _MEDIA-BUS-FMT-RGB888-3X8: + + - MEDIA_BUS_FMT_RGB888_3X8 + - 0x101c + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - r\ :sub:`7` + - r\ :sub:`6` + - r\ :sub:`5` + - r\ :sub:`4` + - r\ :sub:`3` + - r\ :sub:`2` + - r\ :sub:`1` + - r\ :sub:`0` + * - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - g\ :sub:`7` + - g\ :sub:`6` + - g\ :sub:`5` + - g\ :sub:`4` + - g\ :sub:`3` + - g\ :sub:`2` + - g\ :sub:`1` + - g\ :sub:`0` + * - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - b\ :sub:`7` + - b\ :sub:`6` + - b\ :sub:`5` + - b\ :sub:`4` + - b\ :sub:`3` + - b\ :sub:`2` + - b\ :sub:`1` + - b\ :sub:`0` * .. _MEDIA-BUS-FMT-ARGB888-1X32: - MEDIA_BUS_FMT_ARGB888_1X32 diff -Naur linux-5.3-rc6/drivers/dma-buf/dma-fence-chain.c linux-5.3-rc6-agd5fed/drivers/dma-buf/dma-fence-chain.c --- linux-5.3-rc6/drivers/dma-buf/dma-fence-chain.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/dma-buf/dma-fence-chain.c 2019-08-31 15:01:11.833736166 -0500 @@ -178,8 +178,30 @@ static void dma_fence_chain_release(struct dma_fence *fence) { struct dma_fence_chain *chain = to_dma_fence_chain(fence); + struct dma_fence *prev; + + /* Manually unlink the chain as much as possible to avoid recursion + * and potential stack overflow. + */ + while ((prev = rcu_dereference_protected(chain->prev, true))) { + struct dma_fence_chain *prev_chain; + + if (kref_read(&prev->refcount) > 1) + break; + + prev_chain = to_dma_fence_chain(prev); + if (!prev_chain) + break; + + /* No need for atomic operations since we hold the last + * reference to prev_chain. + */ + chain->prev = prev_chain->prev; + RCU_INIT_POINTER(prev_chain->prev, NULL); + dma_fence_put(prev); + } + dma_fence_put(prev); - dma_fence_put(rcu_dereference_protected(chain->prev, true)); dma_fence_put(chain->fence); dma_fence_free(fence); } diff -Naur linux-5.3-rc6/drivers/dma-buf/reservation.c linux-5.3-rc6-agd5fed/drivers/dma-buf/reservation.c --- linux-5.3-rc6/drivers/dma-buf/reservation.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/dma-buf/reservation.c 2019-08-31 15:01:11.838736166 -0500 @@ -56,6 +56,85 @@ EXPORT_SYMBOL(reservation_seqcount_string); /** + * reservation_object_list_alloc - allocate fence list + * @shared_max: number of fences we need space for + * + * Allocate a new reservation_object_list and make sure to correctly initialize + * shared_max. + */ +static struct reservation_object_list * +reservation_object_list_alloc(unsigned int shared_max) +{ + struct reservation_object_list *list; + + list = kmalloc(offsetof(typeof(*list), shared[shared_max]), GFP_KERNEL); + if (!list) + return NULL; + + list->shared_max = (ksize(list) - offsetof(typeof(*list), shared)) / + sizeof(*list->shared); + + return list; +} + +/** + * reservation_object_list_free - free fence list + * @list: list to free + * + * Free a reservation_object_list and make sure to drop all references. + */ +static void reservation_object_list_free(struct reservation_object_list *list) +{ + unsigned int i; + + if (!list) + return; + + for (i = 0; i < list->shared_count; ++i) + dma_fence_put(rcu_dereference_protected(list->shared[i], true)); + + kfree_rcu(list, rcu); +} + +/** + * reservation_object_init - initialize a reservation object + * @obj: the reservation object + */ +void reservation_object_init(struct reservation_object *obj) +{ + ww_mutex_init(&obj->lock, &reservation_ww_class); + + __seqcount_init(&obj->seq, reservation_seqcount_string, + &reservation_seqcount_class); + RCU_INIT_POINTER(obj->fence, NULL); + RCU_INIT_POINTER(obj->fence_excl, NULL); +} +EXPORT_SYMBOL(reservation_object_init); + +/** + * reservation_object_fini - destroys a reservation object + * @obj: the reservation object + */ +void reservation_object_fini(struct reservation_object *obj) +{ + struct reservation_object_list *fobj; + struct dma_fence *excl; + + /* + * This object should be dead and all references must have + * been released to it, so no need to be protected with rcu. + */ + excl = rcu_dereference_protected(obj->fence_excl, 1); + if (excl) + dma_fence_put(excl); + + fobj = rcu_dereference_protected(obj->fence, 1); + reservation_object_list_free(fobj); + ww_mutex_destroy(&obj->lock); +} +EXPORT_SYMBOL(reservation_object_fini); + +/** * reservation_object_reserve_shared - Reserve space to add shared fences to * a reservation_object. * @obj: reservation object @@ -87,7 +166,7 @@ max = 4; } - new = kmalloc(offsetof(typeof(*new), shared[max]), GFP_KERNEL); + new = reservation_object_list_alloc(max); if (!new) return -ENOMEM; @@ -108,23 +187,22 @@ RCU_INIT_POINTER(new->shared[j++], fence); } new->shared_count = j; - new->shared_max = max; - preempt_disable(); - write_seqcount_begin(&obj->seq); /* - * RCU_INIT_POINTER can be used here, - * seqcount provides the necessary barriers + * We are not changing the effective set of fences here so can + * merely update the pointer to the new array; both existing + * readers and new readers will see exactly the same set of + * active (unsignaled) shared fences. Individual fences and the + * old array are protected by RCU and so will not vanish under + * the gaze of the rcu_read_lock() readers. */ - RCU_INIT_POINTER(obj->fence, new); - write_seqcount_end(&obj->seq); - preempt_enable(); + rcu_assign_pointer(obj->fence, new); if (!old) return 0; /* Drop the references to the signaled fences */ - for (i = k; i < new->shared_max; ++i) { + for (i = k; i < max; ++i) { struct dma_fence *fence; fence = rcu_dereference_protected(new->shared[i], @@ -149,6 +227,7 @@ struct dma_fence *fence) { struct reservation_object_list *fobj; + struct dma_fence *old; unsigned int i, count; dma_fence_get(fence); @@ -162,18 +241,16 @@ write_seqcount_begin(&obj->seq); for (i = 0; i < count; ++i) { - struct dma_fence *old_fence; - old_fence = rcu_dereference_protected(fobj->shared[i], - reservation_object_held(obj)); - if (old_fence->context == fence->context || - dma_fence_is_signaled(old_fence)) { - dma_fence_put(old_fence); + old = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(obj)); + if (old->context == fence->context || + dma_fence_is_signaled(old)) goto replace; - } } BUG_ON(fobj->shared_count >= fobj->shared_max); + old = NULL; count++; replace: @@ -183,6 +260,7 @@ write_seqcount_end(&obj->seq); preempt_enable(); + dma_fence_put(old); } EXPORT_SYMBOL(reservation_object_add_shared_fence); @@ -239,7 +317,6 @@ { struct reservation_object_list *src_list, *dst_list; struct dma_fence *old, *new; - size_t size; unsigned i; reservation_object_assert_held(dst); @@ -251,10 +328,9 @@ if (src_list) { unsigned shared_count = src_list->shared_count; - size = offsetof(typeof(*src_list), shared[shared_count]); rcu_read_unlock(); - dst_list = kmalloc(size, GFP_KERNEL); + dst_list = reservation_object_list_alloc(shared_count); if (!dst_list) return -ENOMEM; @@ -266,7 +342,6 @@ } dst_list->shared_count = 0; - dst_list->shared_max = shared_count; for (i = 0; i < src_list->shared_count; ++i) { struct dma_fence *fence; @@ -276,7 +351,7 @@ continue; if (!dma_fence_get_rcu(fence)) { - kfree(dst_list); + reservation_object_list_free(dst_list); src_list = rcu_dereference(src->fence); goto retry; } @@ -306,8 +381,7 @@ write_seqcount_end(&dst->seq); preempt_enable(); - if (src_list) - kfree_rcu(src_list, rcu); + reservation_object_list_free(src_list); dma_fence_put(old); return 0; @@ -385,13 +459,6 @@ if (!dma_fence_get_rcu(shared[i])) break; } - - if (!pfence_excl && fence_excl) { - shared[i] = fence_excl; - fence_excl = NULL; - ++i; - ++shared_count; - } } if (i != shared_count || read_seqcount_retry(&obj->seq, seq)) { @@ -406,6 +473,11 @@ rcu_read_unlock(); } while (ret); + if (pfence_excl) + *pfence_excl = fence_excl; + else if (fence_excl) + shared[++shared_count] = fence_excl; + if (!shared_count) { kfree(shared); shared = NULL; @@ -413,9 +485,6 @@ *pshared_count = shared_count; *pshared = shared; - if (pfence_excl) - *pfence_excl = fence_excl; - return ret; } EXPORT_SYMBOL_GPL(reservation_object_get_fences_rcu); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 2019-08-31 15:01:11.839736167 -0500 @@ -0,0 +1,323 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#undef pr_fmt +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include +#include +#include +#include +#include +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "sdma0/sdma0_4_2_2_offset.h" +#include "sdma0/sdma0_4_2_2_sh_mask.h" +#include "sdma1/sdma1_4_2_2_offset.h" +#include "sdma1/sdma1_4_2_2_sh_mask.h" +#include "sdma2/sdma2_4_2_2_offset.h" +#include "sdma2/sdma2_4_2_2_sh_mask.h" +#include "sdma3/sdma3_4_2_2_offset.h" +#include "sdma3/sdma3_4_2_2_sh_mask.h" +#include "sdma4/sdma4_4_2_2_offset.h" +#include "sdma4/sdma4_4_2_2_sh_mask.h" +#include "sdma5/sdma5_4_2_2_offset.h" +#include "sdma5/sdma5_4_2_2_sh_mask.h" +#include "sdma6/sdma6_4_2_2_offset.h" +#include "sdma6/sdma6_4_2_2_sh_mask.h" +#include "sdma7/sdma7_4_2_2_offset.h" +#include "sdma7/sdma7_4_2_2_sh_mask.h" +#include "v9_structs.h" +#include "soc15.h" +#include "soc15d.h" +#include "amdgpu_amdkfd_gfx_v9.h" + +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t base[8] = { + SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA2, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA3, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA4, 0, + mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA5, 0, + mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA6, 0, + mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA7, 0, + mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL + }; + uint32_t retval; + + retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - + mmSDMA0_RLC0_RB_CNTL); + + pr_debug("sdma base address: 0x%x\n", retval); + + return retval; +} + +static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, + u32 instance, u32 offset) +{ + switch (instance) { + case 0: + return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); + case 1: + return (adev->reg_offset[SDMA1_HWIP][0][1] + offset); + case 2: + return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); + case 3: + return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); + case 4: + return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); + case 5: + return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); + case 6: + return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); + case 7: + return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); + default: + break; + } + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + sdmax_gfx_context_cntl = sdma_v4_0_get_reg_offset(adev, + m->sdma_engine_id, mmSDMA0_GFX_CONTEXT_CNTL); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + data = RREG32(sdmax_gfx_context_cntl); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(sdmax_gfx_context_cntl, data); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + *dump = kmalloc_array(HQD_N_REGS * 2, sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_base_addr + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + +static const struct kfd2kgd_calls kfd2kgd = { + .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, + .init_interrupts = kgd_gfx_v9_init_interrupts, + .hqd_load = kgd_gfx_v9_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_gfx_v9_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_gfx_v9_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_gfx_v9_address_watch_disable, + .address_watch_execute = kgd_gfx_v9_address_watch_execute, + .wave_control_execute = kgd_gfx_v9_wave_control_execute, + .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, + .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, + .get_tile_config = kgd_gfx_v9_get_tile_config, + .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, + .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 2019-08-31 15:01:11.839736167 -0500 @@ -87,7 +87,12 @@ case CHIP_RAVEN: kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); break; + case CHIP_ARCTURUS: + kfd2kgd = amdgpu_amdkfd_arcturus_get_functions(); + break; case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions(); break; default: @@ -651,8 +656,12 @@ { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (adev->powerplay.pp_funcs && - adev->powerplay.pp_funcs->switch_power_profile) + if (is_support_sw_smu(adev)) + smu_switch_power_profile(&adev->smu, + PP_SMC_POWER_PROFILE_COMPUTE, + !idle); + else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->switch_power_profile) amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, !idle); @@ -714,6 +723,11 @@ { return NULL; } + +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void) +{ + return NULL; +} struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void) { diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 2019-08-31 15:01:11.839736167 -0500 @@ -27,7 +27,6 @@ #include #include #include -#include #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ucode.h" @@ -802,42 +801,6 @@ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; } -static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) -{ - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - uint32_t req = (1 << vmid) | - (0 << GCVM_INVALIDATE_ENG0_REQ__FLUSH_TYPE__SHIFT) |/* legacy */ - GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PTES_MASK | - GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE0_MASK | - GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE1_MASK | - GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE2_MASK | - GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L1_PTES_MASK; - - mutex_lock(&adev->srbm_mutex); - - /* Use light weight invalidation. - * - * TODO 1: agree on the right set of invalidation registers for - * KFD use. Use the last one for now. Invalidate only GCHUB as - * SDMA is now moved to GCHUB - * - * TODO 2: support range-based invalidation, requires kfg2kgd - * interface change - */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32), - 0xffffffff); - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32), - 0x0000001f); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ), req); - - while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK)) & - (1 << vmid))) - cpu_relax(); - - mutex_unlock(&adev->srbm_mutex); -} - static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) { signed long r; @@ -878,7 +841,8 @@ if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) == pasid) { - write_vmid_invalidate_request(kgd, vmid); + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + AMDGPU_GFXHUB_0, 0); break; } } @@ -896,7 +860,7 @@ return 0; } - write_vmid_invalidate_request(kgd, vmid); + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); return 0; } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 2019-08-31 15:01:11.839736167 -0500 @@ -47,6 +47,7 @@ #include "soc15d.h" #include "mmhub_v1_0.h" #include "gfxhub_v1_0.h" +#include "gmc_v9_0.h" #define V9_PIPE_PER_MEC (4) @@ -58,66 +59,11 @@ RESET_WAVES }; -/* - * Register access functions - */ - -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, - uint32_t sh_mem_config, - uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, - uint32_t sh_mem_bases); -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, - unsigned int vmid); -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm); -static int kgd_hqd_dump(struct kgd_dev *kgd, - uint32_t pipe_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, - uint32_t __user *wptr, struct mm_struct *mm); -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, - uint32_t engine_id, uint32_t queue_id, - uint32_t (**dump)[2], uint32_t *n_regs); -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id); -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, - enum kfd_preempt_type reset_type, - unsigned int utimeout, uint32_t pipe_id, - uint32_t queue_id); -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, - unsigned int utimeout); -static int kgd_address_watch_disable(struct kgd_dev *kgd); -static int kgd_address_watch_execute(struct kgd_dev *kgd, - unsigned int watch_point_id, - uint32_t cntl_val, - uint32_t addr_hi, - uint32_t addr_lo); -static int kgd_wave_control_execute(struct kgd_dev *kgd, - uint32_t gfx_index_val, - uint32_t sq_cmd); -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, - unsigned int watch_point_id, - unsigned int reg_offset); - -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, - uint8_t vmid); -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, - uint8_t vmid); -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); -static void set_scratch_backing_va(struct kgd_dev *kgd, - uint64_t va, uint32_t vmid); -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); /* Because of REG_GET_FIELD() being used, we put this function in the * asic specific file. */ -static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, +int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, struct tile_config *config) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -135,39 +81,6 @@ return 0; } -static const struct kfd2kgd_calls kfd2kgd = { - .program_sh_mem_settings = kgd_program_sh_mem_settings, - .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_interrupts = kgd_init_interrupts, - .hqd_load = kgd_hqd_load, - .hqd_sdma_load = kgd_hqd_sdma_load, - .hqd_dump = kgd_hqd_dump, - .hqd_sdma_dump = kgd_hqd_sdma_dump, - .hqd_is_occupied = kgd_hqd_is_occupied, - .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, - .hqd_destroy = kgd_hqd_destroy, - .hqd_sdma_destroy = kgd_hqd_sdma_destroy, - .address_watch_disable = kgd_address_watch_disable, - .address_watch_execute = kgd_address_watch_execute, - .wave_control_execute = kgd_wave_control_execute, - .address_watch_get_offset = kgd_address_watch_get_offset, - .get_atc_vmid_pasid_mapping_pasid = - get_atc_vmid_pasid_mapping_pasid, - .get_atc_vmid_pasid_mapping_valid = - get_atc_vmid_pasid_mapping_valid, - .set_scratch_backing_va = set_scratch_backing_va, - .get_tile_config = amdgpu_amdkfd_get_tile_config, - .set_vm_context_page_table_base = set_vm_context_page_table_base, - .invalidate_tlbs = invalidate_tlbs, - .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .get_hive_id = amdgpu_amdkfd_get_hive_id, -}; - -struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) -{ - return (struct kfd2kgd_calls *)&kfd2kgd; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -215,7 +128,7 @@ unlock_srbm(kgd); } -static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, @@ -232,7 +145,7 @@ unlock_srbm(kgd); } -static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, +int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -293,7 +206,7 @@ * but still works */ -static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; @@ -343,7 +256,7 @@ return (struct v9_sdma_mqd *)mqd; } -static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, +int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, uint32_t wptr_shift, uint32_t wptr_mask, struct mm_struct *mm) @@ -438,7 +351,7 @@ return 0; } -static int kgd_hqd_dump(struct kgd_dev *kgd, +int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { @@ -575,7 +488,7 @@ return 0; } -static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, +bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -616,7 +529,7 @@ return false; } -static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) @@ -704,7 +617,7 @@ return 0; } -static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid) { uint32_t reg; @@ -715,7 +628,7 @@ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; } -static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, +uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid) { uint32_t reg; @@ -754,10 +667,10 @@ return 0; } -static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; + int vmid, i; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; uint32_t flush_type = 0; @@ -773,11 +686,12 @@ for (vmid = 0; vmid < 16; vmid++) { if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) continue; - if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { - if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) + if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { + if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid) == pasid) { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - flush_type); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + i, flush_type); break; } } @@ -786,9 +700,10 @@ return 0; } -static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int i; if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("non kfd vmid %d\n", vmid); @@ -810,16 +725,18 @@ * TODO 2: support range-based invalidation, requires kfg2kgd * interface change */ - amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + return 0; } -static int kgd_address_watch_disable(struct kgd_dev *kgd) +int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) { return 0; } -static int kgd_address_watch_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -828,7 +745,7 @@ return 0; } -static int kgd_wave_control_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, uint32_t gfx_index_val, uint32_t sq_cmd) { @@ -853,14 +770,14 @@ return 0; } -static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, +uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -static void set_scratch_backing_va(struct kgd_dev *kgd, +void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid) { /* No longer needed on GFXv9. The scratch base address is @@ -869,7 +786,7 @@ */ } -static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -884,7 +801,45 @@ * now, all processes share the same address space size, like * on GFX8 and older. */ - mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); + if (adev->asic_type == CHIP_ARCTURUS) { + /* Two MMHUBs */ + mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base); + mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base); + } else + mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); } + +static const struct kfd2kgd_calls kfd2kgd = { + .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, + .init_interrupts = kgd_gfx_v9_init_interrupts, + .hqd_load = kgd_gfx_v9_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_gfx_v9_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_gfx_v9_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_gfx_v9_address_watch_disable, + .address_watch_execute = kgd_gfx_v9_address_watch_execute, + .wave_control_execute = kgd_gfx_v9_wave_control_execute, + .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid, + .set_scratch_backing_va = kgd_gfx_v9_set_scratch_backing_va, + .get_tile_config = kgd_gfx_v9_get_tile_config, + .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, + .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, + .get_hive_id = amdgpu_amdkfd_get_hive_id, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 2019-08-31 15:01:11.839736167 -0500 @@ -0,0 +1,69 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + + +void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd); +int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +uint16_t kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base); +void kgd_gfx_v9_set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); +int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); +int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 2019-08-31 15:01:11.840736167 -0500 @@ -218,7 +218,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, struct amdgpu_amdkfd_fence *ef) { - struct reservation_object *resv = bo->tbo.resv; + struct reservation_object *resv = bo->tbo.base.resv; struct reservation_object_list *old, *new; unsigned int i, j, k; @@ -812,7 +812,7 @@ struct amdgpu_bo *pd = peer_vm->root.base.bo; ret = amdgpu_sync_resv(NULL, - sync, pd->tbo.resv, + sync, pd->tbo.base.resv, AMDGPU_FENCE_OWNER_KFD, false); if (ret) return ret; @@ -887,7 +887,7 @@ AMDGPU_FENCE_OWNER_KFD, false); if (ret) goto wait_pd_fail; - ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); + ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; amdgpu_bo_fence(vm->root.base.bo, @@ -1090,7 +1090,7 @@ */ if (flags & ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; + alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; @@ -2133,7 +2133,7 @@ * Add process eviction fence to bo so they can * evict each other. */ - ret = reservation_object_reserve_shared(gws_bo->tbo.resv, 1); + ret = reservation_object_reserve_shared(gws_bo->tbo.base.resv, 1); if (ret) goto reserve_shared_fail; amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 2019-08-31 15:01:11.839736167 -0500 @@ -140,6 +140,7 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_arcturus_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c 2019-08-31 15:01:11.840736167 -0500 @@ -574,6 +574,7 @@ { 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX }, + { 0x1002, 0x699f, 0x1028, 0x0814, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX }, { 0, 0, 0, 0, 0 }, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c 2019-08-31 15:01:11.840736167 -0500 @@ -1505,6 +1505,7 @@ struct amdgpu_connector_atom_dig *amdgpu_dig_connector; struct drm_encoder *encoder; struct amdgpu_encoder *amdgpu_encoder; + struct i2c_adapter *ddc = NULL; uint32_t subpixel_order = SubPixelNone; bool shared_ddc = false; bool is_dp_bridge = false; @@ -1574,17 +1575,21 @@ amdgpu_connector->con_priv = amdgpu_dig_connector; if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); - if (amdgpu_connector->ddc_bus) + if (amdgpu_connector->ddc_bus) { has_aux = true; - else + ddc = &amdgpu_connector->ddc_bus->adapter; + } else { DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + } } switch (connector_type) { case DRM_MODE_CONNECTOR_VGA: case DRM_MODE_CONNECTOR_DVIA: default: - drm_connector_init(dev, &amdgpu_connector->base, - &amdgpu_connector_dp_funcs, connector_type); + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dp_funcs, + connector_type, + ddc); drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); connector->interlace_allowed = true; @@ -1602,8 +1607,10 @@ case DRM_MODE_CONNECTOR_HDMIA: case DRM_MODE_CONNECTOR_HDMIB: case DRM_MODE_CONNECTOR_DisplayPort: - drm_connector_init(dev, &amdgpu_connector->base, - &amdgpu_connector_dp_funcs, connector_type); + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dp_funcs, + connector_type, + ddc); drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, @@ -1644,8 +1651,10 @@ break; case DRM_MODE_CONNECTOR_LVDS: case DRM_MODE_CONNECTOR_eDP: - drm_connector_init(dev, &amdgpu_connector->base, - &amdgpu_connector_edp_funcs, connector_type); + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_edp_funcs, + connector_type, + ddc); drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, @@ -1659,13 +1668,18 @@ } else { switch (connector_type) { case DRM_MODE_CONNECTOR_VGA: - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_vga_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); amdgpu_connector->dac_load_detect = true; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.load_detect_property, @@ -1679,13 +1693,18 @@ connector->doublescan_allowed = true; break; case DRM_MODE_CONNECTOR_DVIA: - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_vga_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_vga_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_vga_helper_funcs); amdgpu_connector->dac_load_detect = true; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.load_detect_property, @@ -1704,13 +1723,18 @@ if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dvi_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); subpixel_order = SubPixelHorizontalRGB; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.coherent_mode_property, @@ -1754,13 +1778,18 @@ if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dvi_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dvi_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dvi_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.coherent_mode_property, 1); @@ -1796,15 +1825,20 @@ if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_dp_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); - if (amdgpu_connector->ddc_bus) + if (amdgpu_connector->ddc_bus) { has_aux = true; - else + ddc = &amdgpu_connector->ddc_bus->adapter; + } else { DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + } } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_dp_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); subpixel_order = SubPixelHorizontalRGB; drm_object_attach_property(&amdgpu_connector->base.base, adev->mode_info.coherent_mode_property, @@ -1838,15 +1872,20 @@ if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_edp_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); - if (amdgpu_connector->ddc_bus) + if (amdgpu_connector->ddc_bus) { has_aux = true; - else + ddc = &amdgpu_connector->ddc_bus->adapter; + } else { DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + } } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_edp_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_dp_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); @@ -1859,13 +1898,18 @@ if (!amdgpu_dig_connector) goto failed; amdgpu_connector->con_priv = amdgpu_dig_connector; - drm_connector_init(dev, &amdgpu_connector->base, &amdgpu_connector_lvds_funcs, connector_type); - drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs); if (i2c_bus->valid) { amdgpu_connector->ddc_bus = amdgpu_i2c_lookup(adev, i2c_bus); if (!amdgpu_connector->ddc_bus) DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n"); + else + ddc = &amdgpu_connector->ddc_bus->adapter; } + drm_connector_init_with_ddc(dev, &amdgpu_connector->base, + &amdgpu_connector_lvds_funcs, + connector_type, + ddc); + drm_connector_helper_add(&amdgpu_connector->base, &amdgpu_connector_lvds_helper_funcs); drm_object_attach_property(&amdgpu_connector->base.base, dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_FULLSCREEN); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 2019-08-31 15:01:12.278736205 -0500 @@ -402,7 +402,7 @@ struct ttm_operation_ctx ctx = { .interruptible = true, .no_wait_gpu = false, - .resv = bo->tbo.resv, + .resv = bo->tbo.base.resv, .flags = 0 }; uint32_t domain; @@ -730,7 +730,7 @@ list_for_each_entry(e, &p->validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - struct reservation_object *resv = bo->tbo.resv; + struct reservation_object *resv = bo->tbo.base.resv; r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, amdgpu_bo_explicit_sync(bo)); @@ -1732,7 +1732,7 @@ *map = mapping; /* Double check that the BO is reserved by this CS */ - if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) + if (reservation_object_locking_ctx((*bo)->tbo.base.resv) != &parser->ticket) return -EINVAL; if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 2019-08-31 15:01:11.840736167 -0500 @@ -42,7 +42,7 @@ [AMDGPU_HW_IP_VCN_JPEG] = 1, }; -static int amdgput_ctx_total_num_entities(void) +static int amdgpu_ctx_total_num_entities(void) { unsigned i, num_entities = 0; @@ -73,8 +73,8 @@ struct drm_file *filp, struct amdgpu_ctx *ctx) { - unsigned num_entities = amdgput_ctx_total_num_entities(); - unsigned i, j; + unsigned num_entities = amdgpu_ctx_total_num_entities(); + unsigned i, j, k; int r; if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX) @@ -123,7 +123,7 @@ for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; - unsigned num_rings; + unsigned num_rings = 0; unsigned num_rqs = 0; switch (i) { @@ -154,16 +154,26 @@ num_rings = 1; break; case AMDGPU_HW_IP_VCN_DEC: - rings[0] = &adev->vcn.ring_dec; - num_rings = 1; + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + rings[num_rings++] = &adev->vcn.inst[j].ring_dec; + } break; case AMDGPU_HW_IP_VCN_ENC: - rings[0] = &adev->vcn.ring_enc[0]; - num_rings = 1; + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + for (k = 0; k < adev->vcn.num_enc_rings; ++k) + rings[num_rings++] = &adev->vcn.inst[j].ring_enc[k]; + } break; case AMDGPU_HW_IP_VCN_JPEG: - rings[0] = &adev->vcn.ring_jpeg; - num_rings = 1; + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + rings[num_rings++] = &adev->vcn.inst[j].ring_jpeg; + } break; } @@ -197,7 +207,7 @@ static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); - unsigned num_entities = amdgput_ctx_total_num_entities(); + unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_device *adev = ctx->adev; unsigned i, j; @@ -279,10 +289,7 @@ ctx = container_of(ref, struct amdgpu_ctx, refcount); - num_entities = 0; - for (i = 0; i < AMDGPU_HW_IP_NUM; i++) - num_entities += amdgpu_ctx_num_entities[i]; - + num_entities = amdgpu_ctx_total_num_entities(); for (i = 0; i < num_entities; i++) drm_sched_entity_destroy(&ctx->entities[0][i].entity); @@ -344,7 +351,7 @@ { struct amdgpu_ctx *ctx; struct amdgpu_ctx_mgr *mgr; - uint32_t ras_counter; + unsigned long ras_counter; if (!fpriv) return -EINVAL; @@ -514,7 +521,7 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, enum drm_sched_priority priority) { - unsigned num_entities = amdgput_ctx_total_num_entities(); + unsigned num_entities = amdgpu_ctx_total_num_entities(); enum drm_sched_priority ctx_prio; unsigned i; @@ -534,21 +541,24 @@ struct drm_sched_entity *entity) { struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); - unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1); - struct dma_fence *other = centity->fences[idx]; + struct dma_fence *other; + unsigned idx; + long r; - if (other) { - signed long r; - r = dma_fence_wait(other, true); - if (r < 0) { - if (r != -ERESTARTSYS) - DRM_ERROR("Error (%ld) waiting for fence!\n", r); + spin_lock(&ctx->ring_lock); + idx = centity->sequence & (amdgpu_sched_jobs - 1); + other = dma_fence_get(centity->fences[idx]); + spin_unlock(&ctx->ring_lock); - return r; - } - } + if (!other) + return 0; - return 0; + r = dma_fence_wait(other, true); + if (r < 0 && r != -ERESTARTSYS) + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + + dma_fence_put(other); + return r; } void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) @@ -559,7 +569,7 @@ long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) { - unsigned num_entities = amdgput_ctx_total_num_entities(); + unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id, i; @@ -581,7 +591,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) { - unsigned num_entities = amdgput_ctx_total_num_entities(); + unsigned num_entities = amdgpu_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id, i; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 2019-08-31 15:01:11.840736167 -0500 @@ -49,8 +49,8 @@ enum drm_sched_priority override_priority; struct mutex lock; atomic_t guilty; - uint32_t ras_counter_ce; - uint32_t ras_counter_ue; + unsigned long ras_counter_ce; + unsigned long ras_counter_ue; }; struct amdgpu_ctx_mgr { diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 2019-08-31 15:01:11.841736167 -0500 @@ -70,7 +70,11 @@ MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/renoir_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi14_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -98,7 +102,11 @@ "VEGA12", "VEGA20", "RAVEN", + "ARCTURUS", + "RENOIR", "NAVI10", + "NAVI14", + "NAVI12", "LAST", }; @@ -413,6 +421,40 @@ } /** + * amdgpu_invalid_rreg64 - dummy 64 bit reg read function + * + * @adev: amdgpu device pointer + * @reg: offset of register + * + * Dummy register read function. Used for register blocks + * that certain asics don't have (all asics). + * Returns the value in the register. + */ +static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) +{ + DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); + BUG(); + return 0; +} + +/** + * amdgpu_invalid_wreg64 - dummy reg write function + * + * @adev: amdgpu device pointer + * @reg: offset of register + * @v: value to write to the register + * + * Dummy register read function. Used for register blocks + * that certain asics don't have (all asics). + */ +static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) +{ + DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", + reg, v); + BUG(); +} + +/** * amdgpu_block_invalid_rreg - dummy reg read function * * @adev: amdgpu device pointer @@ -1384,9 +1426,21 @@ else chip_name = "raven"; break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; + case CHIP_RENOIR: + chip_name = "renoir"; + break; case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); @@ -1529,7 +1583,10 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: - if (adev->asic_type == CHIP_RAVEN) + case CHIP_ARCTURUS: + case CHIP_RENOIR: + if (adev->asic_type == CHIP_RAVEN || + adev->asic_type == CHIP_RENOIR) adev->family = AMDGPU_FAMILY_RV; else adev->family = AMDGPU_FAMILY_AI; @@ -1539,6 +1596,8 @@ return r; break; case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->family = AMDGPU_FAMILY_NV; r = nv_set_ip_blocks(adev); @@ -1560,9 +1619,6 @@ r = amdgpu_virt_request_full_gpu(adev, true); if (r) return -EAGAIN; - - /* query the reg access mode at the very beginning */ - amdgpu_virt_init_reg_access_mode(adev); } adev->pm.pp_feature = amdgpu_pp_feature_mask; @@ -1665,28 +1721,34 @@ if (adev->asic_type >= CHIP_VEGA10) { for (i = 0; i < adev->num_ip_blocks; i++) { - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { - if (adev->in_gpu_reset || adev->in_suspend) { - if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) - break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */ - r = adev->ip_blocks[i].version->funcs->resume(adev); - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", + if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) + continue; + + /* no need to do the fw loading again if already done*/ + if (adev->ip_blocks[i].status.hw == true) + break; + + if (adev->in_gpu_reset || adev->in_suspend) { + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); - return r; - } - } else { - r = adev->ip_blocks[i].version->funcs->hw_init(adev); - if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } + return r; + } + } else { + r = adev->ip_blocks[i].version->funcs->hw_init(adev); + if (r) { + DRM_ERROR("hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; } - adev->ip_blocks[i].status.hw = true; } + + adev->ip_blocks[i].status.hw = true; + break; } } + r = amdgpu_pm_load_smu_firmware(adev, &smu_version); return r; @@ -2128,7 +2190,9 @@ if (r) { DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); + return r; } + adev->ip_blocks[i].status.hw = false; } } @@ -2163,6 +2227,25 @@ DRM_ERROR("suspend of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); } + adev->ip_blocks[i].status.hw = false; + /* handle putting the SMC in the appropriate state */ + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { + if (is_support_sw_smu(adev)) { + /* todo */ + } else if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_mp1_state) { + r = adev->powerplay.pp_funcs->set_mp1_state( + adev->powerplay.pp_handle, + adev->mp1_state); + if (r) { + DRM_ERROR("SMC failed to set mp1 state %d, %d\n", + adev->mp1_state, r); + return r; + } + } + } + + adev->ip_blocks[i].status.hw = false; } return 0; @@ -2215,6 +2298,7 @@ for (j = 0; j < adev->num_ip_blocks; j++) { block = &adev->ip_blocks[j]; + block->status.hw = false; if (block->version->type != ip_order[i] || !block->status.valid) continue; @@ -2223,6 +2307,7 @@ DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; + block->status.hw = true; } } @@ -2250,13 +2335,15 @@ block = &adev->ip_blocks[j]; if (block->version->type != ip_order[i] || - !block->status.valid) + !block->status.valid || + block->status.hw) continue; r = block->version->funcs->hw_init(adev); DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); if (r) return r; + block->status.hw = true; } } @@ -2280,17 +2367,19 @@ int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) + if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { + r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); return r; } + adev->ip_blocks[i].status.hw = true; } } @@ -2315,7 +2404,7 @@ int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) + if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || @@ -2328,6 +2417,7 @@ adev->ip_blocks[i].version->funcs->name, r); return r; } + adev->ip_blocks[i].status.hw = true; } return 0; @@ -2426,6 +2516,11 @@ #endif #if defined(CONFIG_DRM_AMD_DC_DCN2_0) case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: +#endif +#if defined(CONFIG_DRM_AMD_DC_DCN2_1) + case CHIP_RENOIR: #endif return amdgpu_dc != 0; #endif @@ -2509,6 +2604,8 @@ adev->pcie_wreg = &amdgpu_invalid_wreg; adev->pciep_rreg = &amdgpu_invalid_rreg; adev->pciep_wreg = &amdgpu_invalid_wreg; + adev->pcie_rreg64 = &amdgpu_invalid_rreg64; + adev->pcie_wreg64 = &amdgpu_invalid_wreg64; adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; adev->didt_rreg = &amdgpu_invalid_rreg; @@ -3389,7 +3486,7 @@ amdgpu_virt_init_data_exchange(adev); amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { - atomic_inc(&adev->vram_lost_counter); + amdgpu_inc_vram_lost(adev); r = amdgpu_device_recover_vram(adev); } @@ -3431,6 +3528,7 @@ case CHIP_VEGA20: case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_RAVEN: break; default: goto disabled; @@ -3554,7 +3652,7 @@ vram_lost = amdgpu_device_check_vram_lost(tmp_adev); if (vram_lost) { DRM_INFO("VRAM is lost due to GPU reset!\n"); - atomic_inc(&tmp_adev->vram_lost_counter); + amdgpu_inc_vram_lost(tmp_adev); } r = amdgpu_gtt_mgr_recover( @@ -3627,6 +3725,17 @@ atomic_inc(&adev->gpu_reset_counter); adev->in_gpu_reset = 1; + switch (amdgpu_asic_reset_method(adev)) { + case AMD_RESET_METHOD_MODE1: + adev->mp1_state = PP_MP1_STATE_SHUTDOWN; + break; + case AMD_RESET_METHOD_MODE2: + adev->mp1_state = PP_MP1_STATE_RESET; + break; + default: + adev->mp1_state = PP_MP1_STATE_NONE; + break; + } /* Block kfd: SRIOV would do it separately */ if (!amdgpu_sriov_vf(adev)) amdgpu_amdkfd_pre_reset(adev); @@ -3640,6 +3749,7 @@ if (!amdgpu_sriov_vf(adev)) amdgpu_amdkfd_post_reset(adev); amdgpu_vf_error_trans_all(adev); + adev->mp1_state = PP_MP1_STATE_NONE; adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); } @@ -3684,14 +3794,14 @@ if (hive && !mutex_trylock(&hive->reset_lock)) { DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress", - job->base.id, hive->hive_id); + job ? job->base.id : -1, hive->hive_id); return 0; } /* Start with adev pre asic reset first for soft reset check.*/ if (!amdgpu_device_lock_adev(adev, !hive)) { DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress", - job->base.id); + job ? job->base.id : -1); return 0; } @@ -3732,7 +3842,7 @@ if (!ring || !ring->sched.thread) continue; - drm_sched_stop(&ring->sched, &job->base); + drm_sched_stop(&ring->sched, job ? &job->base : NULL); } } @@ -3757,9 +3867,7 @@ /* Guilty job will be freed after this*/ - r = amdgpu_device_pre_asic_reset(adev, - job, - &need_full_reset); + r = amdgpu_device_pre_asic_reset(adev, job, &need_full_reset); if (r) { /*TODO Should we stop ?*/ DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c 2019-08-31 15:01:11.841736167 -0500 @@ -191,7 +191,8 @@ } if (!adev->enable_virtual_display) { - r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev)); + r = amdgpu_bo_pin(new_abo, + amdgpu_display_supported_domains(adev, new_abo->flags)); if (unlikely(r != 0)) { DRM_ERROR("failed to pin new abo buffer before flip\n"); goto unreserve; @@ -204,7 +205,7 @@ goto unpin; } - r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl, + r = reservation_object_get_fences_rcu(new_abo->tbo.base.resv, &work->excl, &work->shared_count, &work->shared); if (unlikely(r != 0)) { @@ -495,13 +496,25 @@ .create_handle = drm_gem_fb_create_handle, }; -uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev) +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, + uint64_t bo_flags) { uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM; #if defined(CONFIG_DRM_AMD_DC) - if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN && - adev->flags & AMD_IS_APU && + /* + * if amdgpu_bo_support_uswc returns false it means that USWC mappings + * is not supported for this board. But this mapping is required + * to avoid hang caused by placement of scanout BO in GTT on certain + * APUs. So force the BO placement to VRAM in case this architecture + * will not allow USWC mappings. + * Also, don't allow GTT domain if the BO doens't have USWC falg set. + */ + if (adev->asic_type >= CHIP_CARRIZO && + adev->asic_type <= CHIP_RAVEN && + (adev->flags & AMD_IS_APU) && + (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && + amdgpu_bo_support_uswc(bo_flags) && amdgpu_device_asic_has_dc_support(adev->asic_type)) domain |= AMDGPU_GEM_DOMAIN_GTT; #endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h 2019-08-31 15:01:11.841736167 -0500 @@ -38,7 +38,8 @@ int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); void amdgpu_display_update_priority(struct amdgpu_device *adev); -uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); +uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, + uint64_t bo_flags); struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c 2019-08-31 15:01:11.841736167 -0500 @@ -216,7 +216,7 @@ * fences on the reservation object into a single exclusive * fence. */ - r = __reservation_object_make_exclusive(bo->tbo.resv); + r = __reservation_object_make_exclusive(bo->tbo.base.resv); if (r) goto error_unreserve; } @@ -268,20 +268,6 @@ } /** - * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation - * @obj: GEM BO - * - * Returns: - * The BO's reservation object. - */ -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) -{ - struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); - - return bo->tbo.resv; -} - -/** * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation * @dma_buf: Shared DMA buffer * @direction: Direction of DMA transfer @@ -299,7 +285,7 @@ struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); struct ttm_operation_ctx ctx = { true, false }; - u32 domain = amdgpu_display_supported_domains(adev); + u32 domain = amdgpu_display_supported_domains(adev, bo->flags); int ret; bool reads = (direction == DMA_BIDIRECTIONAL || direction == DMA_FROM_DEVICE); @@ -339,14 +325,12 @@ * @gobj: GEM BO * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR. * - * The main work is done by the &drm_gem_prime_export helper, which in turn - * uses &amdgpu_gem_prime_res_obj. + * The main work is done by the &drm_gem_prime_export helper. * * Returns: * Shared DMA buffer representing the GEM BO from the given device. */ -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gobj, +struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, int flags) { struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); @@ -356,9 +340,9 @@ bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) return ERR_PTR(-EPERM); - buf = drm_gem_prime_export(dev, gobj, flags); + buf = drm_gem_prime_export(gobj, flags); if (!IS_ERR(buf)) { - buf->file->f_mapping = dev->anon_inode->i_mapping; + buf->file->f_mapping = gobj->dev->anon_inode->i_mapping; buf->ops = &amdgpu_dmabuf_ops; } @@ -396,7 +380,7 @@ bp.flags = 0; bp.type = ttm_bo_type_sg; bp.resv = resv; - ww_mutex_lock(&resv->lock, NULL); + reservation_object_lock(resv, NULL); ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) goto error; @@ -408,11 +392,11 @@ if (attach->dmabuf->ops != &amdgpu_dmabuf_ops) bo->prime_shared_count = 1; - ww_mutex_unlock(&resv->lock); - return &bo->gem_base; + reservation_object_unlock(resv); + return &bo->tbo.base; error: - ww_mutex_unlock(&resv->lock); + reservation_object_unlock(resv); return ERR_PTR(ret); } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h 2019-08-31 15:01:11.841736167 -0500 @@ -30,12 +30,10 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gobj, +struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj, int flags); struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf); -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 2019-08-31 15:01:11.841736167 -0500 @@ -130,13 +130,18 @@ AMDGPU_VEGA20_DOORBELL_IH = 0x178, /* MMSCH: 392~407 * overlap the doorbell assignment with VCN as they are mutually exclusive - * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + * VCN engine's doorbell is 32 bit and two VCN ring share one QWORD */ - AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* VNC0 */ AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189, AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A, AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B, + AMDGPU_VEGA20_DOORBELL64_VCN8_9 = 0x18C, /* VNC1 */ + AMDGPU_VEGA20_DOORBELL64_VCNa_b = 0x18D, + AMDGPU_VEGA20_DOORBELL64_VCNc_d = 0x18E, + AMDGPU_VEGA20_DOORBELL64_VCNe_f = 0x18F, + AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188, AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189, AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 2019-08-31 15:01:11.841736167 -0500 @@ -79,9 +79,10 @@ * - 3.31.0 - Add support for per-flip tiling attribute changes with DC * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS. * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS. + * - 3.34.0 - Non-DC can flip correctly between buffers with different pitches */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 33 +#define KMS_DRIVER_MINOR 34 #define KMS_DRIVER_PATCHLEVEL 0 #define AMDGPU_MAX_TIMEOUT_PARAM_LENTH 256 @@ -142,7 +143,7 @@ int amdgpu_mcbp = 0; int amdgpu_discovery = -1; int amdgpu_mes = 0; -int amdgpu_noretry; +int amdgpu_noretry = 1; struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), @@ -610,7 +611,7 @@ module_param_named(mes, amdgpu_mes, int, 0444); MODULE_PARM_DESC(noretry, - "Disable retry faults (0 = retry enabled (default), 1 = retry disabled)"); + "Disable retry faults (0 = retry enabled, 1 = retry disabled (default))"); module_param_named(noretry, amdgpu_noretry, int, 0644); #ifdef CONFIG_HSA_AMD @@ -996,6 +997,11 @@ /* Raven */ {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, + /* Arcturus */ + {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, /* Navi10 */ {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, @@ -1004,6 +1010,11 @@ {0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, + /* Navi14 */ + {0x1002, 0x7340, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI14}, + + /* Renoir */ + {0x1002, 0x1636, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RENOIR|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, {0, 0, 0} }; @@ -1092,21 +1103,21 @@ * unfortunately we can't detect certain * hypervisors so just do this all the time. */ + adev->mp1_state = PP_MP1_STATE_UNLOAD; amdgpu_device_ip_suspend(adev); + adev->mp1_state = PP_MP1_STATE_NONE; } static int amdgpu_pmops_suspend(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_suspend(drm_dev, true, true); } static int amdgpu_pmops_resume(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct drm_device *drm_dev = dev_get_drvdata(dev); /* GPU comes up enabled by the bios on resume */ if (amdgpu_device_is_px(drm_dev)) { @@ -1120,33 +1131,29 @@ static int amdgpu_pmops_freeze(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_suspend(drm_dev, false, true); } static int amdgpu_pmops_thaw(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_resume(drm_dev, false, true); } static int amdgpu_pmops_poweroff(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_suspend(drm_dev, true, true); } static int amdgpu_pmops_restore(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); + struct drm_device *drm_dev = dev_get_drvdata(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); return amdgpu_device_resume(drm_dev, false, true); } @@ -1205,8 +1212,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) { - struct pci_dev *pdev = to_pci_dev(dev); - struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct drm_device *drm_dev = dev_get_drvdata(dev); struct drm_crtc *crtc; if (!amdgpu_device_is_px(drm_dev)) { @@ -1373,7 +1379,7 @@ .driver_features = DRIVER_USE_AGP | DRIVER_ATOMIC | DRIVER_GEM | - DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, + DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, .load = amdgpu_driver_load_kms, .open = amdgpu_driver_open_kms, .postclose = amdgpu_driver_postclose_kms, @@ -1397,7 +1403,6 @@ .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_export = amdgpu_gem_prime_export, .gem_prime_import = amdgpu_gem_prime_import, - .gem_prime_res_obj = amdgpu_gem_prime_res_obj, .gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table, .gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table, .gem_prime_vmap = amdgpu_gem_prime_vmap, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c 2019-08-31 15:01:11.841736167 -0500 @@ -131,6 +131,10 @@ int aligned_size, size; int height = mode_cmd->height; u32 cpp; + u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; info = drm_get_format_info(adev->ddev, mode_cmd); cpp = info->cpp[0]; @@ -138,15 +142,11 @@ /* need to align pitch with crtc limits */ mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp, fb_tiled); - domain = amdgpu_display_supported_domains(adev); - + domain = amdgpu_display_supported_domains(adev, flags); height = ALIGN(mode_cmd->height, 8); size = mode_cmd->pitches[0] * height; aligned_size = ALIGN(size, PAGE_SIZE); - ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, + ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags, ttm_bo_type_kernel, NULL, &gobj); if (ret) { pr_err("failed to allocate framebuffer (%d)\n", aligned_size); @@ -168,7 +168,6 @@ dev_err(adev->dev, "FB failed to set tiling flags\n"); } - ret = amdgpu_bo_pin(abo, domain); if (ret) { amdgpu_bo_unreserve(abo); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 2019-08-31 15:01:11.841736167 -0500 @@ -251,7 +251,9 @@ } mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); + return 0; } @@ -310,9 +312,9 @@ uint64_t flags) { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS - unsigned i,t,p; + unsigned t,p; #endif - int r; + int r, i; if (!adev->gart.ready) { WARN(1, "trying to bind memory to uninitialized GART !\n"); @@ -336,7 +338,8 @@ mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); return 0; } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c 2019-08-31 15:01:11.841736167 -0500 @@ -85,7 +85,7 @@ } return r; } - *obj = &bo->gem_base; + *obj = &bo->tbo.base; return 0; } @@ -134,7 +134,7 @@ return -EPERM; if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID && - abo->tbo.resv != vm->root.base.bo->tbo.resv) + abo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) return -EPERM; r = amdgpu_bo_reserve(abo, false); @@ -252,7 +252,7 @@ if (r) return r; - resv = vm->root.base.bo->tbo.resv; + resv = vm->root.base.bo->tbo.base.resv; } r = amdgpu_gem_object_create(adev, size, args->in.alignment, @@ -433,7 +433,7 @@ return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, + ret = reservation_object_wait_timeout_rcu(robj->tbo.base.resv, true, true, timeout); /* ret == 0 means not signaled, @@ -689,7 +689,7 @@ struct drm_amdgpu_gem_create_in info; void __user *out = u64_to_user_ptr(args->value); - info.bo_size = robj->gem_base.size; + info.bo_size = robj->tbo.base.size; info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; info.domains = robj->preferred_domains; info.domain_flags = robj->flags; @@ -747,7 +747,8 @@ struct amdgpu_device *adev = dev->dev_private; struct drm_gem_object *gobj; uint32_t handle; - u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; u32 domain; int r; @@ -764,7 +765,7 @@ args->size = (u64)args->pitch * args->height; args->size = ALIGN(args->size, PAGE_SIZE); domain = amdgpu_bo_get_preferred_pin_domain(adev, - amdgpu_display_supported_domains(adev)); + amdgpu_display_supported_domains(adev, flags)); r = amdgpu_gem_object_create(adev, args->size, 0, domain, flags, ttm_bo_type_device, NULL, &gobj); if (r) @@ -819,8 +820,8 @@ if (pin_count) seq_printf(m, " pin count %d", pin_count); - dma_buf = READ_ONCE(bo->gem_base.dma_buf); - attachment = READ_ONCE(bo->gem_base.import_attach); + dma_buf = READ_ONCE(bo->tbo.base.dma_buf); + attachment = READ_ONCE(bo->tbo.base.import_attach); if (attachment) seq_printf(m, " imported from %p", dma_buf); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h 2019-08-31 15:01:11.841736167 -0500 @@ -31,7 +31,7 @@ */ #define AMDGPU_GEM_DOMAIN_MAX 0x3 -#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base) +#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, tbo.base) void amdgpu_gem_object_free(struct drm_gem_object *obj); int amdgpu_gem_object_open(struct drm_gem_object *obj, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 2019-08-31 15:01:11.842736167 -0500 @@ -389,7 +389,7 @@ dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); } - if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { /* create MQD for each KGQ */ for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; @@ -437,7 +437,7 @@ struct amdgpu_ring *ring = NULL; int i; - if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) { + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) { ring = &adev->gfx.gfx_ring[i]; kfree(adev->gfx.me.mqd_backup[i]); @@ -456,7 +456,7 @@ } ring = &adev->gfx.kiq.ring; - if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) + if (adev->asic_type >= CHIP_NAVI10 && amdgpu_async_gfx_ring) kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]); kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); amdgpu_bo_free_kernel(&ring->mqd_obj, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 2019-08-31 15:01:11.842736167 -0500 @@ -196,6 +196,8 @@ uint32_t *dst); void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); + int (*ras_error_inject)(struct amdgpu_device *adev, void *inject_if); + int (*query_ras_error_count) (struct amdgpu_device *adev, void *ras_error_status); }; struct amdgpu_ngg_buf { diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c 2019-08-31 15:01:11.842736167 -0500 @@ -220,6 +220,14 @@ const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1); u64 size_af, size_bf; + if (amdgpu_sriov_vf(adev)) { + mc->agp_start = 0xffffffff; + mc->agp_end = 0x0; + mc->agp_size = 0; + + return; + } + if (mc->fb_start > mc->gart_start) { size_bf = (mc->fb_start & sixteen_gb_mask) - ALIGN(mc->gart_end + 1, sixteen_gb); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 2019-08-31 15:01:11.842736167 -0500 @@ -89,8 +89,8 @@ */ struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ - void (*flush_gpu_tlb)(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type); + void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -177,10 +177,11 @@ struct amdgpu_xgmi xgmi; struct amdgpu_irq_src ecc_irq; - struct ras_common_if *ras_if; + struct ras_common_if *umc_ras_if; + struct ras_common_if *mmhub_ras_if; }; -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu.h 2019-08-31 15:01:11.839736167 -0500 @@ -86,6 +86,8 @@ #include "amdgpu_smu.h" #include "amdgpu_discovery.h" #include "amdgpu_mes.h" +#include "amdgpu_umc.h" +#include "amdgpu_mmhub.h" #define MAX_GPU_INSTANCE 16 @@ -532,6 +534,14 @@ bool grbm_indexed; }; +enum amd_reset_method { + AMD_RESET_METHOD_LEGACY = 0, + AMD_RESET_METHOD_MODE0, + AMD_RESET_METHOD_MODE1, + AMD_RESET_METHOD_MODE2, + AMD_RESET_METHOD_BACO +}; + /* * ASIC specific functions. */ @@ -543,6 +553,7 @@ u32 sh_num, u32 reg_offset, u32 *value); void (*set_vga_state)(struct amdgpu_device *adev, bool state); int (*reset)(struct amdgpu_device *adev); + enum amd_reset_method (*reset_method)(struct amdgpu_device *adev); /* get the reference clock */ u32 (*get_xclk)(struct amdgpu_device *adev); /* MM block clocks */ @@ -627,6 +638,9 @@ typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); +typedef uint64_t (*amdgpu_rreg64_t)(struct amdgpu_device*, uint32_t); +typedef void (*amdgpu_wreg64_t)(struct amdgpu_device*, uint32_t, uint64_t); + typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); @@ -648,6 +662,12 @@ u32 ref_and_mask_cp9; u32 ref_and_mask_sdma0; u32 ref_and_mask_sdma1; + u32 ref_and_mask_sdma2; + u32 ref_and_mask_sdma3; + u32 ref_and_mask_sdma4; + u32 ref_and_mask_sdma5; + u32 ref_and_mask_sdma6; + u32 ref_and_mask_sdma7; }; struct amdgpu_mmio_remap { @@ -668,7 +688,7 @@ void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index, int doorbell_size); void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell, - int doorbell_index); + int doorbell_index, int instance); void (*enable_doorbell_aperture)(struct amdgpu_device *adev, bool enable); void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev, @@ -705,6 +725,9 @@ int is_disable); void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config, uint64_t *count); + uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); + void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val); }; /* Define the HW IP blocks will be used in driver , add more if necessary */ enum amd_hw_ip_block_type { @@ -712,6 +735,12 @@ HDP_HWIP, SDMA0_HWIP, SDMA1_HWIP, + SDMA2_HWIP, + SDMA3_HWIP, + SDMA4_HWIP, + SDMA5_HWIP, + SDMA6_HWIP, + SDMA7_HWIP, MMHUB_HWIP, ATHUB_HWIP, NBIO_HWIP, @@ -728,10 +757,12 @@ NBIF_HWIP, THM_HWIP, CLK_HWIP, + UMC_HWIP, + RSMU_HWIP, MAX_HWIP }; -#define HWIP_MAX_INSTANCE 6 +#define HWIP_MAX_INSTANCE 8 struct amd_powerplay { void *pp_handle; @@ -758,7 +789,6 @@ int usec_timeout; const struct amdgpu_asic_funcs *asic_funcs; bool shutdown; - bool need_dma32; bool need_swiotlb; bool accel_working; struct notifier_block acpi_nb; @@ -803,6 +833,8 @@ amdgpu_wreg_t pcie_wreg; amdgpu_rreg_t pciep_rreg; amdgpu_wreg_t pciep_wreg; + amdgpu_rreg64_t pcie_rreg64; + amdgpu_wreg64_t pcie_wreg64; /* protects concurrent UVD register access */ spinlock_t uvd_ctx_idx_lock; amdgpu_rreg_t uvd_ctx_rreg; @@ -836,6 +868,7 @@ dma_addr_t dummy_page_addr; struct amdgpu_vm_manager vm_manager; struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS]; + unsigned num_vmhubs; /* memory management */ struct amdgpu_mman mman; @@ -915,6 +948,9 @@ /* KFD */ struct amdgpu_kfd_dev kfd; + /* UMC */ + struct amdgpu_umc umc; + /* display related functionality */ struct amdgpu_display_manager dm; @@ -940,6 +976,7 @@ const struct amdgpu_nbio_funcs *nbio_funcs; const struct amdgpu_df_funcs *df_funcs; + const struct amdgpu_mmhub_funcs *mmhub_funcs; /* delayed work_func for deferring clockgating during resume */ struct delayed_work delayed_init_work; @@ -965,6 +1002,7 @@ /* record last mm index being written through WREG32*/ unsigned long last_mm_index; bool in_gpu_reset; + enum pp_mp1_state mp1_state; struct mutex lock_reset; struct amdgpu_doorbell_index doorbell_index; @@ -1033,6 +1071,8 @@ #define WREG32_PCIE(reg, v) adev->pcie_wreg(adev, (reg), (v)) #define RREG32_PCIE_PORT(reg) adev->pciep_rreg(adev, (reg)) #define WREG32_PCIE_PORT(reg, v) adev->pciep_wreg(adev, (reg), (v)) +#define RREG64_PCIE(reg) adev->pcie_rreg64(adev, (reg)) +#define WREG64_PCIE(reg, v) adev->pcie_wreg64(adev, (reg), (v)) #define RREG32_SMC(reg) adev->smc_rreg(adev, (reg)) #define WREG32_SMC(reg, v) adev->smc_wreg(adev, (reg), (v)) #define RREG32_UVD_CTX(reg) adev->uvd_ctx_rreg(adev, (reg)) @@ -1093,6 +1133,7 @@ */ #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state)) #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev)) +#define amdgpu_asic_reset_method(adev) (adev)->asic_funcs->reset_method((adev)) #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev)) #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d)) #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec)) @@ -1110,6 +1151,7 @@ #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev)) #define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev))) +#define amdgpu_inc_vram_lost(adev) atomic_inc(&((adev)->vram_lost_counter)); /* Common functions */ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c 2019-08-31 15:01:11.842736167 -0500 @@ -368,7 +368,8 @@ * are broken on Navi10 and Navi14. */ if (needs_flush && (adev->asic_type < CHIP_VEGA10 || - adev->asic_type == CHIP_NAVI10)) + adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14)) continue; /* Good, we can use this VMID. Remember this submission as diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 2019-08-31 15:01:11.842736167 -0500 @@ -408,23 +408,38 @@ break; case AMDGPU_HW_IP_VCN_DEC: type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.ring_dec.sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + if (adev->vcn.inst[i].ring_dec.sched.ready) + ++num_rings; + } ib_start_alignment = 16; ib_size_alignment = 16; break; case AMDGPU_HW_IP_VCN_ENC: type = AMD_IP_BLOCK_TYPE_VCN; - for (i = 0; i < adev->vcn.num_enc_rings; i++) - if (adev->vcn.ring_enc[i].sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + for (j = 0; j < adev->vcn.num_enc_rings; j++) + if (adev->vcn.inst[i].ring_enc[j].sched.ready) + ++num_rings; + } ib_start_alignment = 64; ib_size_alignment = 1; break; case AMDGPU_HW_IP_VCN_JPEG: type = AMD_IP_BLOCK_TYPE_VCN; - if (adev->vcn.ring_jpeg.sched.ready) - ++num_rings; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + if (adev->vcn.inst[i].ring_jpeg.sched.ready) + ++num_rings; + } ib_start_alignment = 16; ib_size_alignment = 16; break; @@ -1088,7 +1103,7 @@ amdgpu_vm_fini(adev, &fpriv->vm); if (pasid) - amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); + amdgpu_pasid_free_delayed(pd->tbo.base.resv, pasid); amdgpu_bo_unref(&pd); idr_for_each_entry(&fpriv->bo_list_handles, list, handle) diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h 2019-08-31 15:01:11.842736167 -0500 @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_MMHUB_H__ +#define __AMDGPU_MMHUB_H__ + +struct amdgpu_mmhub_funcs { + void (*ras_init)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); +}; + +#endif + diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 2019-08-31 15:01:11.842736167 -0500 @@ -179,7 +179,7 @@ if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, end)) continue; - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, + r = reservation_object_wait_timeout_rcu(bo->tbo.base.resv, true, false, MAX_SCHEDULE_TIMEOUT); if (r <= 0) DRM_ERROR("(%ld) failed to wait for user bo\n", r); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 2019-08-31 15:01:11.842736167 -0500 @@ -80,14 +80,11 @@ if (bo->pin_count > 0) amdgpu_bo_subtract_pin_size(bo); - if (bo->kfd_bo) - amdgpu_amdkfd_unreserve_memory_limit(bo); - amdgpu_bo_kunmap(bo); - if (bo->gem_base.import_attach) - drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg); - drm_gem_object_release(&bo->gem_base); + if (bo->tbo.base.import_attach) + drm_prime_gem_destroy(&bo->tbo.base, bo->tbo.sg); + drm_gem_object_release(&bo->tbo.base); /* in case amdgpu_device_recover_vram got NULL of bo->parent */ if (!list_empty(&bo->shadow_list)) { mutex_lock(&adev->shadow_list_lock); @@ -249,8 +246,9 @@ bp.size = size; bp.byte_align = align; bp.domain = domain; - bp.flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + bp.flags = cpu_addr ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED + : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; + bp.flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; bp.type = ttm_bo_type_kernel; bp.resv = NULL; @@ -413,6 +411,40 @@ return false; } +bool amdgpu_bo_support_uswc(u64 bo_flags) +{ + +#ifdef CONFIG_X86_32 + /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit + * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 + */ + return false; +#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) + /* Don't try to enable write-combining when it can't work, or things + * may be slow + * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 + */ + +#ifndef CONFIG_COMPILE_TEST +#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ + thanks to write-combining +#endif + + if (bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) + DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " + "better performance thanks to write-combining\n"); + return false; +#else + /* For architectures that don't support WC memory, + * mask out the WC flag from the BO + */ + if (!drm_arch_can_wc_memory()) + return false; + + return true; +#endif +} + static int amdgpu_bo_do_create(struct amdgpu_device *adev, struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) @@ -454,7 +486,7 @@ bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL); if (bo == NULL) return -ENOMEM; - drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); + drm_gem_private_object_init(adev->ddev, &bo->tbo.base, size); INIT_LIST_HEAD(&bo->shadow_list); bo->vm_bo = NULL; bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : @@ -466,33 +498,8 @@ bo->flags = bp->flags; -#ifdef CONFIG_X86_32 - /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit - * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 - */ - bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) - /* Don't try to enable write-combining when it can't work, or things - * may be slow - * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 - */ - -#ifndef CONFIG_COMPILE_TEST -#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ - thanks to write-combining -#endif - - if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) - DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " - "better performance thanks to write-combining\n"); - bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#else - /* For architectures that don't support WC memory, - * mask out the WC flag from the BO - */ - if (!drm_arch_can_wc_memory()) + if (!amdgpu_bo_support_uswc(bo->flags)) bo->flags &= ~AMDGPU_GEM_CREATE_CPU_GTT_USWC; -#endif bo->tbo.bdev = &adev->mman.bdev; if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA | @@ -521,7 +528,7 @@ bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence); + r = amdgpu_fill_buffer(bo, 0, bo->tbo.base.resv, &fence); if (unlikely(r)) goto fail_unreserve; @@ -544,7 +551,7 @@ fail_unreserve: if (!bp->resv) - ww_mutex_unlock(&bo->tbo.resv->lock); + reservation_object_unlock(bo->tbo.base.resv); amdgpu_bo_unref(&bo); return r; } @@ -565,7 +572,7 @@ bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_SHADOW; bp.type = ttm_bo_type_kernel; - bp.resv = bo->tbo.resv; + bp.resv = bo->tbo.base.resv; r = amdgpu_bo_do_create(adev, &bp, &bo->shadow); if (!r) { @@ -606,13 +613,13 @@ if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) { if (!bp->resv) - WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, + WARN_ON(reservation_object_lock((*bo_ptr)->tbo.base.resv, NULL)); r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr); if (!bp->resv) - reservation_object_unlock((*bo_ptr)->tbo.resv); + reservation_object_unlock((*bo_ptr)->tbo.base.resv); if (r) amdgpu_bo_unref(bo_ptr); @@ -709,7 +716,7 @@ return 0; } - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false, + r = reservation_object_wait_timeout_rcu(bo->tbo.base.resv, false, false, MAX_SCHEDULE_TIMEOUT); if (r < 0) return r; @@ -1087,7 +1094,7 @@ */ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags) { - lockdep_assert_held(&bo->tbo.resv->lock.base); + reservation_object_assert_held(bo->tbo.base.resv); if (tiling_flags) *tiling_flags = bo->tiling_flags; @@ -1212,6 +1219,42 @@ } /** + * amdgpu_bo_move_notify - notification about a BO being released + * @bo: pointer to a buffer object + * + * Wipes VRAM buffers whose contents should not be leaked before the + * memory is released. + */ +void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) +{ + struct dma_fence *fence = NULL; + struct amdgpu_bo *abo; + int r; + + if (!amdgpu_bo_is_amdgpu_bo(bo)) + return; + + abo = ttm_to_amdgpu_bo(bo); + + if (abo->kfd_bo) + amdgpu_amdkfd_unreserve_memory_limit(abo); + + if (bo->mem.mem_type != TTM_PL_VRAM || !bo->mem.mm_node || + !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) + return; + + reservation_object_lock(bo->base.resv, NULL); + + r = amdgpu_fill_buffer(abo, AMDGPU_POISON, bo->base.resv, &fence); + if (!WARN_ON(r)) { + amdgpu_bo_fence(abo, fence, false); + dma_fence_put(fence); + } + + reservation_object_unlock(bo->base.resv); +} + +/** * amdgpu_bo_fault_reserve_notify - notification about a memory fault * @bo: pointer to a buffer object * @@ -1283,7 +1326,7 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared) { - struct reservation_object *resv = bo->tbo.resv; + struct reservation_object *resv = bo->tbo.base.resv; if (shared) reservation_object_add_shared_fence(resv, fence); @@ -1308,7 +1351,7 @@ int r; amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, bo->tbo.resv, owner, false); + amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv, owner, false); r = amdgpu_sync_wait(&sync, intr); amdgpu_sync_free(&sync); @@ -1328,7 +1371,7 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) { WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); - WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && + WARN_ON_ONCE(!reservation_object_is_locked(bo->tbo.base.resv) && !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel); WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 2019-08-31 15:01:11.842736167 -0500 @@ -94,7 +94,6 @@ /* per VM structure for page tables and with virtual addresses */ struct amdgpu_vm_bo_base *vm_bo; /* Constant after initialization */ - struct drm_gem_object gem_base; struct amdgpu_bo *parent; struct amdgpu_bo *shadow; @@ -192,7 +191,7 @@ */ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) { - return drm_vma_node_offset_addr(&bo->tbo.vma_node); + return drm_vma_node_offset_addr(&bo->tbo.base.vma_node); } /** @@ -265,6 +264,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict, struct ttm_mem_reg *new_mem); +void amdgpu_bo_release_notify(struct ttm_buffer_object *bo); int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared); @@ -308,5 +308,7 @@ struct seq_file *m); #endif +bool amdgpu_bo_support_uswc(u64 bo_flags); + #endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c 2019-08-31 15:01:11.843736167 -0500 @@ -325,13 +325,6 @@ (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (!amdgpu_sriov_vf(adev)) { - if (is_support_sw_smu(adev)) - current_level = smu_get_performance_level(&adev->smu); - else if (adev->powerplay.pp_funcs->get_performance_level) - current_level = amdgpu_dpm_get_performance_level(adev); - } - if (strncmp("low", buf, strlen("low")) == 0) { level = AMD_DPM_FORCED_LEVEL_LOW; } else if (strncmp("high", buf, strlen("high")) == 0) { @@ -355,17 +348,23 @@ goto fail; } - if (amdgpu_sriov_vf(adev)) { - if (amdgim_is_hwperf(adev) && - adev->virt.ops->force_dpm_level) { - mutex_lock(&adev->pm.mutex); - adev->virt.ops->force_dpm_level(adev, level); - mutex_unlock(&adev->pm.mutex); - return count; - } else { - return -EINVAL; + /* handle sriov case here */ + if (amdgpu_sriov_vf(adev)) { + if (amdgim_is_hwperf(adev) && + adev->virt.ops->force_dpm_level) { + mutex_lock(&adev->pm.mutex); + adev->virt.ops->force_dpm_level(adev, level); + mutex_unlock(&adev->pm.mutex); + return count; + } else { + return -EINVAL; } - } + } + + if (is_support_sw_smu(adev)) + current_level = smu_get_performance_level(&adev->smu); + else if (adev->powerplay.pp_funcs->get_performance_level) + current_level = amdgpu_dpm_get_performance_level(adev); if (current_level == level) return count; @@ -746,10 +745,10 @@ } /** - * DOC: ppfeatures + * DOC: pp_features * * The amdgpu driver provides a sysfs API for adjusting what powerplay - * features to be enabled. The file ppfeatures is used for this. And + * features to be enabled. The file pp_features is used for this. And * this is only available for Vega10 and later dGPUs. * * Reading back the file will show you the followings: @@ -761,7 +760,7 @@ * the corresponding bit from original ppfeature masks and input the * new ppfeature masks. */ -static ssize_t amdgpu_set_ppfeature_status(struct device *dev, +static ssize_t amdgpu_set_pp_feature_status(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -778,7 +777,7 @@ pr_debug("featuremask = 0x%llx\n", featuremask); if (is_support_sw_smu(adev)) { - ret = smu_set_ppfeature_status(&adev->smu, featuremask); + ret = smu_sys_set_pp_feature_mask(&adev->smu, featuremask); if (ret) return -EINVAL; } else if (adev->powerplay.pp_funcs->set_ppfeature_status) { @@ -790,7 +789,7 @@ return count; } -static ssize_t amdgpu_get_ppfeature_status(struct device *dev, +static ssize_t amdgpu_get_pp_feature_status(struct device *dev, struct device_attribute *attr, char *buf) { @@ -798,7 +797,7 @@ struct amdgpu_device *adev = ddev->dev_private; if (is_support_sw_smu(adev)) { - return smu_get_ppfeature_status(&adev->smu, buf); + return smu_sys_get_pp_feature_mask(&adev->smu, buf); } else if (adev->powerplay.pp_funcs->get_ppfeature_status) return amdgpu_dpm_get_ppfeature_status(adev, buf); @@ -1458,9 +1457,9 @@ static DEVICE_ATTR(mem_busy_percent, S_IRUGO, amdgpu_get_memory_busy_percent, NULL); static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL); -static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR, - amdgpu_get_ppfeature_status, - amdgpu_set_ppfeature_status); +static DEVICE_ATTR(pp_features, S_IRUGO | S_IWUSR, + amdgpu_get_pp_feature_status, + amdgpu_set_pp_feature_status); static DEVICE_ATTR(unique_id, S_IRUGO, amdgpu_get_unique_id, NULL); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, @@ -1625,20 +1624,16 @@ (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; - if (is_support_sw_smu(adev)) { - err = kstrtoint(buf, 10, &value); - if (err) - return err; + err = kstrtoint(buf, 10, &value); + if (err) + return err; + if (is_support_sw_smu(adev)) { smu_set_fan_control_mode(&adev->smu, value); } else { if (!adev->powerplay.pp_funcs->set_fan_control_mode) return -EINVAL; - err = kstrtoint(buf, 10, &value); - if (err) - return err; - amdgpu_dpm_set_fan_control_mode(adev, value); } @@ -2058,16 +2053,18 @@ return err; value = value / 1000000; /* convert to Watt */ + if (is_support_sw_smu(adev)) { - adev->smu.funcs->set_power_limit(&adev->smu, value); + err = smu_set_power_limit(&adev->smu, value); } else if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) { err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value); - if (err) - return err; } else { - return -EINVAL; + err = -EINVAL; } + if (err) + return err; + return count; } @@ -2352,7 +2349,9 @@ effective_mode &= ~S_IWUSR; } - if ((adev->flags & AMD_IS_APU) && + if (((adev->flags & AMD_IS_APU) || + adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ + adev->family == AMDGPU_FAMILY_KV) && /* not implemented yet */ (attr == &sensor_dev_attr_power1_average.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr || attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr|| @@ -2376,6 +2375,12 @@ return 0; } + if ((adev->family == AMDGPU_FAMILY_SI || /* not implemented yet */ + adev->family == AMDGPU_FAMILY_KV) && /* not implemented yet */ + (attr == &sensor_dev_attr_in0_input.dev_attr.attr || + attr == &sensor_dev_attr_in0_label.dev_attr.attr)) + return 0; + /* only APUs have vddnb */ if (!(adev->flags & AMD_IS_APU) && (attr == &sensor_dev_attr_in1_input.dev_attr.attr || @@ -2831,10 +2836,12 @@ DRM_ERROR("failed to create device file pp_dpm_socclk\n"); return ret; } - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); - return ret; + if (adev->asic_type != CHIP_ARCTURUS) { + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_dcefclk\n"); + return ret; + } } } if (adev->asic_type >= CHIP_VEGA20) { @@ -2844,10 +2851,12 @@ return ret; } } - ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); - if (ret) { - DRM_ERROR("failed to create device file pp_dpm_pcie\n"); - return ret; + if (adev->asic_type != CHIP_ARCTURUS) { + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_pcie\n"); + return ret; + } } ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od); if (ret) { @@ -2917,10 +2926,10 @@ if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) { ret = device_create_file(adev->dev, - &dev_attr_ppfeatures); + &dev_attr_pp_features); if (ret) { DRM_ERROR("failed to create device file " - "ppfeatures\n"); + "pp_features\n"); return ret; } } @@ -2951,9 +2960,11 @@ device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); if (adev->asic_type >= CHIP_VEGA10) { device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk); - device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk); + if (adev->asic_type != CHIP_ARCTURUS) + device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk); } - device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); + if (adev->asic_type != CHIP_ARCTURUS) + device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); if (adev->asic_type >= CHIP_VEGA20) device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk); device_remove_file(adev->dev, &dev_attr_pp_sclk_od); @@ -2974,7 +2985,7 @@ device_remove_file(adev->dev, &dev_attr_unique_id); if ((adev->asic_type >= CHIP_VEGA10) && !(adev->flags & AMD_IS_APU)) - device_remove_file(adev->dev, &dev_attr_ppfeatures); + device_remove_file(adev->dev, &dev_attr_pp_features); } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 2019-08-31 15:01:11.843736167 -0500 @@ -32,6 +32,7 @@ #include "psp_v3_1.h" #include "psp_v10_0.h" #include "psp_v11_0.h" +#include "psp_v12_0.h" static void psp_set_funcs(struct amdgpu_device *adev); @@ -53,13 +54,19 @@ psp->autoload_supported = false; break; case CHIP_VEGA20: + case CHIP_ARCTURUS: psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = false; break; case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: psp_v11_0_set_psp_funcs(psp); psp->autoload_supported = true; break; + case CHIP_RENOIR: + psp_v12_0_set_psp_funcs(psp); + break; default: return -EINVAL; } @@ -137,8 +144,7 @@ memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); index = atomic_inc_return(&psp->fence_value); - ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, - fence_mc_addr, index); + ret = psp_cmd_submit(psp, psp->cmd_buf_mc_addr, fence_mc_addr, index); if (ret) { atomic_dec(&psp->fence_value); mutex_unlock(&psp->mutex); @@ -162,8 +168,8 @@ if (ucode) DRM_WARN("failed to load ucode id (%d) ", ucode->ucode_id); - DRM_WARN("psp command failed and response status is (%d)\n", - psp->cmd_buf_mem->resp.status); + DRM_WARN("psp command failed and response status is (0x%X)\n", + psp->cmd_buf_mem->resp.status & GFX_CMD_STATUS_MASK); if (!timeout) { mutex_unlock(&psp->mutex); return -EINVAL; @@ -233,6 +239,8 @@ { int ret; int tmr_size; + void *tmr_buf; + void **pptr; /* * According to HW engineer, they prefer the TMR address be "naturally @@ -255,9 +263,10 @@ } } + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + &psp->tmr_bo, &psp->tmr_mc_addr, pptr); return ret; } @@ -831,7 +840,6 @@ "XGMI: Failed to initialize XGMI session\n"); } - if (psp->adev->psp.ta_fw) { ret = psp_ras_initialize(psp); if (ret) @@ -852,6 +860,24 @@ case AMDGPU_UCODE_ID_SDMA1: *type = GFX_FW_TYPE_SDMA1; break; + case AMDGPU_UCODE_ID_SDMA2: + *type = GFX_FW_TYPE_SDMA2; + break; + case AMDGPU_UCODE_ID_SDMA3: + *type = GFX_FW_TYPE_SDMA3; + break; + case AMDGPU_UCODE_ID_SDMA4: + *type = GFX_FW_TYPE_SDMA4; + break; + case AMDGPU_UCODE_ID_SDMA5: + *type = GFX_FW_TYPE_SDMA5; + break; + case AMDGPU_UCODE_ID_SDMA6: + *type = GFX_FW_TYPE_SDMA6; + break; + case AMDGPU_UCODE_ID_SDMA7: + *type = GFX_FW_TYPE_SDMA7; + break; case AMDGPU_UCODE_ID_CP_CE: *type = GFX_FW_TYPE_CP_CE; break; @@ -920,6 +946,60 @@ return 0; } +static void psp_print_fw_hdr(struct psp_context *psp, + struct amdgpu_firmware_info *ucode) +{ + struct amdgpu_device *adev = psp->adev; + const struct sdma_firmware_header_v1_0 *sdma_hdr = + (const struct sdma_firmware_header_v1_0 *) + adev->sdma.instance[ucode->ucode_id - AMDGPU_UCODE_ID_SDMA0].fw->data; + const struct gfx_firmware_header_v1_0 *ce_hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; + const struct gfx_firmware_header_v1_0 *pfp_hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; + const struct gfx_firmware_header_v1_0 *me_hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; + const struct gfx_firmware_header_v1_0 *mec_hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + const struct rlc_firmware_header_v2_0 *rlc_hdr = + (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + const struct smc_firmware_header_v1_0 *smc_hdr = + (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; + + switch (ucode->ucode_id) { + case AMDGPU_UCODE_ID_SDMA0: + case AMDGPU_UCODE_ID_SDMA1: + case AMDGPU_UCODE_ID_SDMA2: + case AMDGPU_UCODE_ID_SDMA3: + case AMDGPU_UCODE_ID_SDMA4: + case AMDGPU_UCODE_ID_SDMA5: + case AMDGPU_UCODE_ID_SDMA6: + case AMDGPU_UCODE_ID_SDMA7: + amdgpu_ucode_print_sdma_hdr(&sdma_hdr->header); + break; + case AMDGPU_UCODE_ID_CP_CE: + amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); + break; + case AMDGPU_UCODE_ID_CP_PFP: + amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); + break; + case AMDGPU_UCODE_ID_CP_ME: + amdgpu_ucode_print_gfx_hdr(&me_hdr->header); + break; + case AMDGPU_UCODE_ID_CP_MEC1: + amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); + break; + case AMDGPU_UCODE_ID_RLC_G: + amdgpu_ucode_print_rlc_hdr(&rlc_hdr->header); + break; + case AMDGPU_UCODE_ID_SMC: + amdgpu_ucode_print_smc_hdr(&smc_hdr->header); + break; + default: + break; + } +} + static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) { @@ -980,17 +1060,31 @@ if (ucode->ucode_id == AMDGPU_UCODE_ID_SMC && (psp_smu_reload_quirk(psp) || psp->autoload_supported)) continue; + if (amdgpu_sriov_vf(adev) && (ucode->ucode_id == AMDGPU_UCODE_ID_SDMA0 || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA1 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA2 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA3 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA4 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA5 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA6 + || ucode->ucode_id == AMDGPU_UCODE_ID_SDMA7 || ucode->ucode_id == AMDGPU_UCODE_ID_RLC_G)) /*skip ucode loading in SRIOV VF */ continue; + if (psp->autoload_supported && (ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC1_JT || ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT)) /* skip mec JT when autoload is enabled */ continue; + /* Renoir only needs to load mec jump table one time */ + if (adev->asic_type == CHIP_RENOIR && + ucode->ucode_id == AMDGPU_UCODE_ID_CP_MEC2_JT) + continue; + + psp_print_fw_hdr(psp, ucode); ret = psp_execute_np_fw_load(psp, ucode); if (ret) @@ -1115,6 +1209,8 @@ { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; + void *tmr_buf; + void **pptr; if (adev->gmc.xgmi.num_physical_nodes > 1 && psp->xgmi_context.initialized == 1) @@ -1125,7 +1221,8 @@ psp_ring_destroy(psp, PSP_RING_TYPE__KM); - amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); + pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL; + amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr); amdgpu_bo_free_kernel(&psp->fw_pri_bo, &psp->fw_pri_mc_addr, &psp->fw_pri_buf); amdgpu_bo_free_kernel(&psp->fence_buf_bo, @@ -1328,4 +1425,13 @@ .minor = 0, .rev = 0, .funcs = &psp_ip_funcs, +}; + +const struct amdgpu_ip_block_version psp_v12_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 12, + .minor = 0, + .rev = 0, + .funcs = &psp_ip_funcs, }; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 2019-08-31 15:01:11.843736167 -0500 @@ -90,7 +90,6 @@ int (*ring_destroy)(struct psp_context *psp, enum psp_ring_type ring_type); int (*cmd_submit)(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index); bool (*compare_sram_data)(struct psp_context *psp, @@ -172,7 +171,6 @@ /* tmr buffer */ struct amdgpu_bo *tmr_bo; uint64_t tmr_mc_addr; - void *tmr_buf; /* asd firmware and buffer */ const struct firmware *asd_fw; @@ -223,8 +221,8 @@ #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type)) #define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type))) -#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \ - (psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index)) +#define psp_cmd_submit(psp, cmd_mc, fence_mc, index) \ + (psp)->funcs->cmd_submit((psp), (cmd_mc), (fence_mc), (index)) #define psp_compare_sram_data(psp, ucode, type) \ (psp)->funcs->compare_sram_data((psp), (ucode), (type)) #define psp_init_microcode(psp) \ @@ -270,6 +268,7 @@ uint32_t field_val, uint32_t mask, bool check_changed); extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; +extern const struct amdgpu_ip_block_version psp_v12_0_ip_block; int psp_gpu_reset(struct amdgpu_device *adev); int psp_update_vcn_sram(struct amdgpu_device *adev, int inst_idx, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 2019-08-31 15:01:11.843736167 -0500 @@ -30,74 +30,6 @@ #include "amdgpu_ras.h" #include "amdgpu_atomfirmware.h" -struct ras_ih_data { - /* interrupt bottom half */ - struct work_struct ih_work; - int inuse; - /* IP callback */ - ras_ih_cb cb; - /* full of entries */ - unsigned char *ring; - unsigned int ring_size; - unsigned int element_size; - unsigned int aligned_element_size; - unsigned int rptr; - unsigned int wptr; -}; - -struct ras_fs_data { - char sysfs_name[32]; - char debugfs_name[32]; -}; - -struct ras_err_data { - unsigned long ue_count; - unsigned long ce_count; -}; - -struct ras_err_handler_data { - /* point to bad pages array */ - struct { - unsigned long bp; - struct amdgpu_bo *bo; - } *bps; - /* the count of entries */ - int count; - /* the space can place new entries */ - int space_left; - /* last reserved entry's index + 1 */ - int last_reserved; -}; - -struct ras_manager { - struct ras_common_if head; - /* reference count */ - int use; - /* ras block link */ - struct list_head node; - /* the device */ - struct amdgpu_device *adev; - /* debugfs */ - struct dentry *ent; - /* sysfs */ - struct device_attribute sysfs_attr; - int attr_inuse; - - /* fs node name */ - struct ras_fs_data fs_data; - - /* IH data */ - struct ras_ih_data ih_data; - - struct ras_err_data err_data; -}; - -struct ras_badpage { - unsigned int bp; - unsigned int size; - unsigned int flags; -}; - const char *ras_error_string[] = { "none", "parity", @@ -130,6 +62,9 @@ #define AMDGPU_RAS_FLAG_INIT_NEED_RESET 2 #define RAS_DEFAULT_FLAGS (AMDGPU_RAS_FLAG_INIT_BY_VBIOS) +/* inject address is 52 bits */ +#define RAS_UMC_INJECT_ADDR_LIMIT (0x1ULL << 52) + static int amdgpu_ras_reserve_vram(struct amdgpu_device *adev, uint64_t offset, uint64_t size, struct amdgpu_bo **bo_ptr); @@ -196,6 +131,7 @@ char err[9] = "ue"; int op = -1; int block_id; + uint32_t sub_block; u64 address, value; if (*pos) @@ -223,17 +159,23 @@ return -EINVAL; data->head.block = block_id; - data->head.type = memcmp("ue", err, 2) == 0 ? - AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE : - AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; + /* only ue and ce errors are supported */ + if (!memcmp("ue", err, 2)) + data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + else if (!memcmp("ce", err, 2)) + data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; + else + return -EINVAL; + data->op = op; if (op == 2) { - if (sscanf(str, "%*s %*s %*s %llu %llu", - &address, &value) != 2) - if (sscanf(str, "%*s %*s %*s 0x%llx 0x%llx", - &address, &value) != 2) + if (sscanf(str, "%*s %*s %*s %u %llu %llu", + &sub_block, &address, &value) != 3) + if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", + &sub_block, &address, &value) != 3) return -EINVAL; + data->head.sub_block_index = sub_block; data->inject.address = address; data->inject.value = value; } @@ -278,7 +220,7 @@ * write the struct to the control node. * * bash: - * echo op block [error [address value]] > .../ras/ras_ctrl + * echo op block [error [sub_blcok address value]] > .../ras/ras_ctrl * op: disable, enable, inject * disable: only block is needed * enable: block and error are needed @@ -288,10 +230,11 @@ * error: ue, ce * ue: multi_uncorrectable * ce: single_correctable + * sub_block: sub block index, pass 0 if there is no sub block * * here are some examples for bash commands, - * echo inject umc ue 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl - * echo inject umc ce 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl + * echo inject umc ue 0x0 0x0 0x0 > /sys/kernel/debug/dri/0/ras/ras_ctrl + * echo inject umc ce 0 0 0 > /sys/kernel/debug/dri/0/ras/ras_ctrl * echo disable umc > /sys/kernel/debug/dri/0/ras/ras_ctrl * * How to check the result? @@ -310,7 +253,6 @@ { struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private; struct ras_debug_if data; - struct amdgpu_bo *bo; int ret = 0; ret = amdgpu_ras_debugfs_ctrl_parse_data(f, buf, size, pos, &data); @@ -328,17 +270,14 @@ ret = amdgpu_ras_feature_enable(adev, &data.head, 1); break; case 2: - ret = amdgpu_ras_reserve_vram(adev, - data.inject.address, PAGE_SIZE, &bo); - if (ret) { - /* address was offset, now it is absolute.*/ - data.inject.address += adev->gmc.vram_start; - if (data.inject.address > adev->gmc.vram_end) - break; - } else - data.inject.address = amdgpu_bo_gpu_offset(bo); + if ((data.inject.address >= adev->gmc.mc_vram_size) || + (data.inject.address >= RAS_UMC_INJECT_ADDR_LIMIT)) { + ret = -EINVAL; + break; + } + + /* data.inject.address is offset instead of absolute gpu address */ ret = amdgpu_ras_error_inject(adev, &data.inject); - amdgpu_ras_release_vram(adev, &bo); break; default: ret = -EINVAL; @@ -656,14 +595,46 @@ struct ras_query_if *info) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_err_data err_data = {0, 0, 0, NULL}; if (!obj) return -EINVAL; - /* TODO might read the register to read the count */ + + switch (info->head.block) { + case AMDGPU_RAS_BLOCK__UMC: + if (adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, &err_data); + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + if (adev->umc.funcs->query_ras_error_address) + adev->umc.funcs->query_ras_error_address(adev, &err_data); + break; + case AMDGPU_RAS_BLOCK__GFX: + if (adev->gfx.funcs->query_ras_error_count) + adev->gfx.funcs->query_ras_error_count(adev, &err_data); + break; + case AMDGPU_RAS_BLOCK__MMHUB: + if (adev->mmhub_funcs->query_ras_error_count) + adev->mmhub_funcs->query_ras_error_count(adev, &err_data); + break; + default: + break; + } + + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; info->ue_count = obj->err_data.ue_count; info->ce_count = obj->err_data.ce_count; + if (err_data.ce_count) + dev_info(adev->dev, "%ld correctable errors detected in %s block\n", + obj->err_data.ce_count, ras_block_str(info->head.block)); + if (err_data.ue_count) + dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n", + obj->err_data.ue_count, ras_block_str(info->head.block)); + return 0; } @@ -684,13 +655,23 @@ if (!obj) return -EINVAL; - if (block_info.block_id != TA_RAS_BLOCK__UMC) { + switch (info->head.block) { + case AMDGPU_RAS_BLOCK__GFX: + if (adev->gfx.funcs->ras_error_inject) + ret = adev->gfx.funcs->ras_error_inject(adev, info); + else + ret = -EINVAL; + break; + case AMDGPU_RAS_BLOCK__UMC: + case AMDGPU_RAS_BLOCK__MMHUB: + ret = psp_ras_trigger_error(&adev->psp, &block_info); + break; + default: DRM_INFO("%s error injection is not supported yet\n", ras_block_str(info->head.block)); - return -EINVAL; + ret = -EINVAL; } - ret = psp_ras_trigger_error(&adev->psp, &block_info); if (ret) DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n", ras_block_str(info->head.block), @@ -707,7 +688,7 @@ } /* get the total error counts on all IPs */ -int amdgpu_ras_query_error_count(struct amdgpu_device *adev, +unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -715,7 +696,7 @@ struct ras_err_data data = {0, 0}; if (!con) - return -EINVAL; + return 0; list_for_each_entry(obj, &con->head, node) { struct ras_query_if info = { @@ -723,7 +704,7 @@ }; if (amdgpu_ras_error_query(adev, &info)) - return -EINVAL; + return 0; data.ce_count += info.ce_count; data.ue_count += info.ue_count; @@ -812,32 +793,8 @@ { struct amdgpu_ras *con = container_of(attr, struct amdgpu_ras, features_attr); - struct drm_device *ddev = dev_get_drvdata(dev); - struct amdgpu_device *adev = ddev->dev_private; - struct ras_common_if head; - int ras_block_count = AMDGPU_RAS_BLOCK_COUNT; - int i; - ssize_t s; - struct ras_manager *obj; - - s = scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); - for (i = 0; i < ras_block_count; i++) { - head.block = i; - - if (amdgpu_ras_is_feature_enabled(adev, &head)) { - obj = amdgpu_ras_find_obj(adev, &head); - s += scnprintf(&buf[s], PAGE_SIZE - s, - "%s: %s\n", - ras_block_str(i), - ras_err_str(obj->head.type)); - } else - s += scnprintf(&buf[s], PAGE_SIZE - s, - "%s: disabled\n", - ras_block_str(i)); - } - - return s; + return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); } static int amdgpu_ras_sysfs_create_feature_node(struct amdgpu_device *adev) @@ -1054,6 +1011,7 @@ struct ras_ih_data *data = &obj->ih_data; struct amdgpu_iv_entry entry; int ret; + struct ras_err_data err_data = {0, 0, 0, NULL}; while (data->rptr != data->wptr) { rmb(); @@ -1068,19 +1026,19 @@ * from the callback to udpate the error type/count, etc */ if (data->cb) { - ret = data->cb(obj->adev, &entry); + ret = data->cb(obj->adev, &err_data, &entry); /* ue will trigger an interrupt, and in that case * we need do a reset to recovery the whole system. * But leave IP do that recovery, here we just dispatch * the error. */ - if (ret == AMDGPU_RAS_UE) { - obj->err_data.ue_count++; + if (ret == AMDGPU_RAS_SUCCESS) { + /* these counts could be left as 0 if + * some blocks do not count error number + */ + obj->err_data.ue_count += err_data.ue_count; + obj->err_data.ce_count += err_data.ce_count; } - /* Might need get ce count by register, but not all IP - * saves ce count, some IP just use one bit or two bits - * to indicate ce happened. - */ } } } @@ -1577,6 +1535,10 @@ if (amdgpu_ras_fs_init(adev)) goto fs_out; + /* ras init for each ras block */ + if (adev->umc.funcs->ras_init) + adev->umc.funcs->ras_init(adev); + DRM_INFO("RAS INFO: ras initialized successfully, " "hardware ability[%x] ras_mask[%x]\n", con->hw_supported, con->supported); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c 2019-08-31 15:01:11.843736167 -0500 @@ -0,0 +1,493 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu_ras_eeprom.h" +#include "amdgpu.h" +#include "amdgpu_ras.h" +#include +#include "smu_v11_0_i2c.h" + +#define EEPROM_I2C_TARGET_ADDR 0xA0 + +/* + * The 2 macros bellow represent the actual size in bytes that + * those entities occupy in the EEPROM memory. + * EEPROM_TABLE_RECORD_SIZE is different than sizeof(eeprom_table_record) which + * uses uint64 to store 6b fields such as retired_page. + */ +#define EEPROM_TABLE_HEADER_SIZE 20 +#define EEPROM_TABLE_RECORD_SIZE 24 + +#define EEPROM_ADDRESS_SIZE 0x2 + +/* Table hdr is 'AMDR' */ +#define EEPROM_TABLE_HDR_VAL 0x414d4452 +#define EEPROM_TABLE_VER 0x00010000 + +/* Assume 2 Mbit size */ +#define EEPROM_SIZE_BYTES 256000 +#define EEPROM_PAGE__SIZE_BYTES 256 +#define EEPROM_HDR_START 0 +#define EEPROM_RECORD_START (EEPROM_HDR_START + EEPROM_TABLE_HEADER_SIZE) +#define EEPROM_MAX_RECORD_NUM ((EEPROM_SIZE_BYTES - EEPROM_TABLE_HEADER_SIZE) / EEPROM_TABLE_RECORD_SIZE) +#define EEPROM_ADDR_MSB_MASK GENMASK(17, 8) + +#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control))->adev + +static void __encode_table_header_to_buff(struct amdgpu_ras_eeprom_table_header *hdr, + unsigned char *buff) +{ + uint32_t *pp = (uint32_t *) buff; + + pp[0] = cpu_to_le32(hdr->header); + pp[1] = cpu_to_le32(hdr->version); + pp[2] = cpu_to_le32(hdr->first_rec_offset); + pp[3] = cpu_to_le32(hdr->tbl_size); + pp[4] = cpu_to_le32(hdr->checksum); +} + +static void __decode_table_header_from_buff(struct amdgpu_ras_eeprom_table_header *hdr, + unsigned char *buff) +{ + uint32_t *pp = (uint32_t *)buff; + + hdr->header = le32_to_cpu(pp[0]); + hdr->version = le32_to_cpu(pp[1]); + hdr->first_rec_offset = le32_to_cpu(pp[2]); + hdr->tbl_size = le32_to_cpu(pp[3]); + hdr->checksum = le32_to_cpu(pp[4]); +} + +static int __update_table_header(struct amdgpu_ras_eeprom_control *control, + unsigned char *buff) +{ + int ret = 0; + struct i2c_msg msg = { + .addr = EEPROM_I2C_TARGET_ADDR, + .flags = 0, + .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, + .buf = buff, + }; + + + *(uint16_t *)buff = EEPROM_HDR_START; + __encode_table_header_to_buff(&control->tbl_hdr, buff + EEPROM_ADDRESS_SIZE); + + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); + if (ret < 1) + DRM_ERROR("Failed to write EEPROM table header, ret:%d", ret); + + return ret; +} + +static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control); + +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) +{ + int ret = 0; + struct amdgpu_device *adev = to_amdgpu_device(control); + unsigned char buff[EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE] = { 0 }; + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct i2c_msg msg = { + .addr = EEPROM_I2C_TARGET_ADDR, + .flags = I2C_M_RD, + .len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_HEADER_SIZE, + .buf = buff, + }; + + mutex_init(&control->tbl_mutex); + + switch (adev->asic_type) { + case CHIP_VEGA20: + ret = smu_v11_0_i2c_eeprom_control_init(&control->eeprom_accessor); + break; + + default: + return 0; + } + + if (ret) { + DRM_ERROR("Failed to init I2C controller, ret:%d", ret); + return ret; + } + + /* Read/Create table header from EEPROM address 0 */ + ret = i2c_transfer(&control->eeprom_accessor, &msg, 1); + if (ret < 1) { + DRM_ERROR("Failed to read EEPROM table header, ret:%d", ret); + return ret; + } + + __decode_table_header_from_buff(hdr, &buff[2]); + + if (hdr->header == EEPROM_TABLE_HDR_VAL) { + control->num_recs = (hdr->tbl_size - EEPROM_TABLE_HEADER_SIZE) / + EEPROM_TABLE_RECORD_SIZE; + DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", + control->num_recs); + + } else { + DRM_INFO("Creating new EEPROM table"); + + hdr->header = EEPROM_TABLE_HDR_VAL; + hdr->version = EEPROM_TABLE_VER; + hdr->first_rec_offset = EEPROM_RECORD_START; + hdr->tbl_size = EEPROM_TABLE_HEADER_SIZE; + + adev->psp.ras.ras->eeprom_control.tbl_byte_sum = + __calc_hdr_byte_sum(&adev->psp.ras.ras->eeprom_control); + ret = __update_table_header(control, buff); + } + + /* Start inserting records from here */ + adev->psp.ras.ras->eeprom_control.next_addr = EEPROM_RECORD_START; + + return ret == 1 ? 0 : -EIO; +} + +void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + switch (adev->asic_type) { + case CHIP_VEGA20: + smu_v11_0_i2c_eeprom_control_fini(&control->eeprom_accessor); + break; + + default: + return; + } +} + +static void __encode_table_record_to_buff(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buff) +{ + __le64 tmp = 0; + int i = 0; + + /* Next are all record fields according to EEPROM page spec in LE foramt */ + buff[i++] = record->err_type; + + buff[i++] = record->bank; + + tmp = cpu_to_le64(record->ts); + memcpy(buff + i, &tmp, 8); + i += 8; + + tmp = cpu_to_le64((record->offset & 0xffffffffffff)); + memcpy(buff + i, &tmp, 6); + i += 6; + + buff[i++] = record->mem_channel; + buff[i++] = record->mcumc_id; + + tmp = cpu_to_le64((record->retired_page & 0xffffffffffff)); + memcpy(buff + i, &tmp, 6); +} + +static void __decode_table_record_from_buff(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *record, + unsigned char *buff) +{ + __le64 tmp = 0; + int i = 0; + + /* Next are all record fields according to EEPROM page spec in LE foramt */ + record->err_type = buff[i++]; + + record->bank = buff[i++]; + + memcpy(&tmp, buff + i, 8); + record->ts = le64_to_cpu(tmp); + i += 8; + + memcpy(&tmp, buff + i, 6); + record->offset = (le64_to_cpu(tmp) & 0xffffffffffff); + i += 6; + + buff[i++] = record->mem_channel; + buff[i++] = record->mcumc_id; + + memcpy(&tmp, buff + i, 6); + record->retired_page = (le64_to_cpu(tmp) & 0xffffffffffff); +} + +/* + * When reaching end of EEPROM memory jump back to 0 record address + * When next record access will go beyond EEPROM page boundary modify bits A17/A8 + * in I2C selector to go to next page + */ +static uint32_t __correct_eeprom_dest_address(uint32_t curr_address) +{ + uint32_t next_address = curr_address + EEPROM_TABLE_RECORD_SIZE; + + /* When all EEPROM memory used jump back to 0 address */ + if (next_address > EEPROM_SIZE_BYTES) { + DRM_INFO("Reached end of EEPROM memory, jumping to 0 " + "and overriding old record"); + return EEPROM_RECORD_START; + } + + /* + * To check if we overflow page boundary compare next address with + * current and see if bits 17/8 of the EEPROM address will change + * If they do start from the next 256b page + * + * https://www.st.com/resource/en/datasheet/m24m02-dr.pdf sec. 5.1.2 + */ + if ((curr_address & EEPROM_ADDR_MSB_MASK) != (next_address & EEPROM_ADDR_MSB_MASK)) { + DRM_DEBUG_DRIVER("Reached end of EEPROM memory page, jumping to next: %lx", + (next_address & EEPROM_ADDR_MSB_MASK)); + + return (next_address & EEPROM_ADDR_MSB_MASK); + } + + return curr_address; +} + + +static uint32_t __calc_hdr_byte_sum(struct amdgpu_ras_eeprom_control *control) +{ + int i; + uint32_t tbl_sum = 0; + + /* Header checksum, skip checksum field in the calculation */ + for (i = 0; i < sizeof(control->tbl_hdr) - sizeof(control->tbl_hdr.checksum); i++) + tbl_sum += *(((unsigned char *)&control->tbl_hdr) + i); + + return tbl_sum; +} + +static uint32_t __calc_recs_byte_sum(struct eeprom_table_record *records, + int num) +{ + int i, j; + uint32_t tbl_sum = 0; + + /* Records checksum */ + for (i = 0; i < num; i++) { + struct eeprom_table_record *record = &records[i]; + + for (j = 0; j < sizeof(*record); j++) { + tbl_sum += *(((unsigned char *)record) + j); + } + } + + return tbl_sum; +} + +static inline uint32_t __calc_tbl_byte_sum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num) +{ + return __calc_hdr_byte_sum(control) + __calc_recs_byte_sum(records, num); +} + +/* Checksum = 256 -((sum of all table entries) mod 256) */ +static void __update_tbl_checksum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num, + uint32_t old_hdr_byte_sum) +{ + /* + * This will update the table sum with new records. + * + * TODO: What happens when the EEPROM table is to be wrapped around + * and old records from start will get overridden. + */ + + /* need to recalculate updated header byte sum */ + control->tbl_byte_sum -= old_hdr_byte_sum; + control->tbl_byte_sum += __calc_tbl_byte_sum(control, records, num); + + control->tbl_hdr.checksum = 256 - (control->tbl_byte_sum % 256); +} + +/* table sum mod 256 + checksum must equals 256 */ +static bool __validate_tbl_checksum(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, int num) +{ + control->tbl_byte_sum = __calc_tbl_byte_sum(control, records, num); + + if (control->tbl_hdr.checksum + (control->tbl_byte_sum % 256) != 256) { + DRM_WARN("Checksum mismatch, checksum: %u ", control->tbl_hdr.checksum); + return false; + } + + return true; +} + +int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, + bool write, + int num) +{ + int i, ret = 0; + struct i2c_msg *msgs; + unsigned char *buffs; + struct amdgpu_device *adev = to_amdgpu_device(control); + + if (adev->asic_type != CHIP_VEGA20) + return 0; + + buffs = kcalloc(num, EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE, + GFP_KERNEL); + if (!buffs) + return -ENOMEM; + + mutex_lock(&control->tbl_mutex); + + msgs = kcalloc(num, sizeof(*msgs), GFP_KERNEL); + if (!msgs) { + ret = -ENOMEM; + goto free_buff; + } + + /* In case of overflow just start from beginning to not lose newest records */ + if (write && (control->next_addr + EEPROM_TABLE_RECORD_SIZE * num > EEPROM_SIZE_BYTES)) + control->next_addr = EEPROM_RECORD_START; + + + /* + * TODO Currently makes EEPROM writes for each record, this creates + * internal fragmentation. Optimized the code to do full page write of + * 256b + */ + for (i = 0; i < num; i++) { + unsigned char *buff = &buffs[i * (EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; + struct eeprom_table_record *record = &records[i]; + struct i2c_msg *msg = &msgs[i]; + + control->next_addr = __correct_eeprom_dest_address(control->next_addr); + + /* + * Update bits 16,17 of EEPROM address in I2C address by setting them + * to bits 1,2 of Device address byte + */ + msg->addr = EEPROM_I2C_TARGET_ADDR | + ((control->next_addr & EEPROM_ADDR_MSB_MASK) >> 15); + msg->flags = write ? 0 : I2C_M_RD; + msg->len = EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE; + msg->buf = buff; + + /* Insert the EEPROM dest addess, bits 0-15 */ + buff[0] = ((control->next_addr >> 8) & 0xff); + buff[1] = (control->next_addr & 0xff); + + /* EEPROM table content is stored in LE format */ + if (write) + __encode_table_record_to_buff(control, record, buff + EEPROM_ADDRESS_SIZE); + + /* + * The destination EEPROM address might need to be corrected to account + * for page or entire memory wrapping + */ + control->next_addr += EEPROM_TABLE_RECORD_SIZE; + } + + ret = i2c_transfer(&control->eeprom_accessor, msgs, num); + if (ret < 1) { + DRM_ERROR("Failed to process EEPROM table records, ret:%d", ret); + + /* TODO Restore prev next EEPROM address ? */ + goto free_msgs; + } + + + if (!write) { + for (i = 0; i < num; i++) { + unsigned char *buff = &buffs[i*(EEPROM_ADDRESS_SIZE + EEPROM_TABLE_RECORD_SIZE)]; + struct eeprom_table_record *record = &records[i]; + + __decode_table_record_from_buff(control, record, buff + EEPROM_ADDRESS_SIZE); + } + } + + if (write) { + uint32_t old_hdr_byte_sum = __calc_hdr_byte_sum(control); + + /* + * Update table header with size and CRC and account for table + * wrap around where the assumption is that we treat it as empty + * table + * + * TODO - Check the assumption is correct + */ + control->num_recs += num; + control->num_recs %= EEPROM_MAX_RECORD_NUM; + control->tbl_hdr.tbl_size += EEPROM_TABLE_RECORD_SIZE * num; + if (control->tbl_hdr.tbl_size > EEPROM_SIZE_BYTES) + control->tbl_hdr.tbl_size = EEPROM_TABLE_HEADER_SIZE + + control->num_recs * EEPROM_TABLE_RECORD_SIZE; + + __update_tbl_checksum(control, records, num, old_hdr_byte_sum); + + __update_table_header(control, buffs); + } else if (!__validate_tbl_checksum(control, records, num)) { + DRM_WARN("EEPROM Table checksum mismatch!"); + /* TODO Uncomment when EEPROM read/write is relliable */ + /* ret = -EIO; */ + } + +free_msgs: + kfree(msgs); + +free_buff: + kfree(buffs); + + mutex_unlock(&control->tbl_mutex); + + return ret == num ? 0 : -EIO; +} + +/* Used for testing if bugs encountered */ +#if 0 +void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control) +{ + int i; + struct eeprom_table_record *recs = kcalloc(1, sizeof(*recs), GFP_KERNEL); + + if (!recs) + return; + + for (i = 0; i < 1 ; i++) { + recs[i].address = 0xdeadbeef; + recs[i].retired_page = i; + } + + if (!amdgpu_ras_eeprom_process_recods(control, recs, true, 1)) { + + memset(recs, 0, sizeof(*recs) * 1); + + control->next_addr = EEPROM_RECORD_START; + + if (!amdgpu_ras_eeprom_process_recods(control, recs, false, 1)) { + for (i = 0; i < 1; i++) + DRM_INFO("rec.address :0x%llx, rec.retired_page :%llu", + recs[i].address, recs[i].retired_page); + } else + DRM_ERROR("Failed in reading from table"); + + } else + DRM_ERROR("Failed in writing to table"); +} +#endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h 2019-08-31 15:01:11.843736167 -0500 @@ -0,0 +1,90 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _AMDGPU_RAS_EEPROM_H +#define _AMDGPU_RAS_EEPROM_H + +#include + +struct amdgpu_device; + +enum amdgpu_ras_eeprom_err_type{ + AMDGPU_RAS_EEPROM_ERR_PLACE_HOLDER, + AMDGPU_RAS_EEPROM_ERR_RECOVERABLE, + AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE +}; + +struct amdgpu_ras_eeprom_table_header { + uint32_t header; + uint32_t version; + uint32_t first_rec_offset; + uint32_t tbl_size; + uint32_t checksum; +}__attribute__((__packed__)); + +struct amdgpu_ras_eeprom_control { + struct amdgpu_ras_eeprom_table_header tbl_hdr; + struct i2c_adapter eeprom_accessor; + uint32_t next_addr; + unsigned int num_recs; + struct mutex tbl_mutex; + bool bus_locked; + uint32_t tbl_byte_sum; +}; + +/* + * Represents single table record. Packed to be easily serialized into byte + * stream. + */ +struct eeprom_table_record { + + union { + uint64_t address; + uint64_t offset; + }; + + uint64_t retired_page; + uint64_t ts; + + enum amdgpu_ras_eeprom_err_type err_type; + + union { + unsigned char bank; + unsigned char cu; + }; + + unsigned char mem_channel; + unsigned char mcumc_id; +}__attribute__((__packed__)); + +int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control); +void amdgpu_ras_eeprom_fini(struct amdgpu_ras_eeprom_control *control); + +int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control, + struct eeprom_table_record *records, + bool write, + int num); + +void amdgpu_ras_eeprom_test(struct amdgpu_ras_eeprom_control *control); + +#endif // _AMDGPU_RAS_EEPROM_H diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h 2019-08-31 15:01:11.843736167 -0500 @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "amdgpu_psp.h" #include "ta_ras_if.h" +#include "amdgpu_ras_eeprom.h" enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, @@ -52,6 +53,236 @@ #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) +enum amdgpu_ras_gfx_subblock { + /* CPC */ + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0, + AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH = + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, + AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1, + AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2, + AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2, + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, + AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, + /* CPF */ + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 = + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1, + AMDGPU_RAS_BLOCK__GFX_CPF_TAG, + AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG, + /* CPG */ + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ = + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG, + AMDGPU_RAS_BLOCK__GFX_CPG_TAG, + AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG, + /* GDS */ + AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + /* SPI */ + AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM, + /* SQ */ + AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, + AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I, + AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, + AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, + /* SQC (3 ranges) */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, + /* SQC range 0 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + /* SQC range 1 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + /* SQC range 2 */ + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END, + /* TA */ + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO = + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, + AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, + /* TCA */ + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO = + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, + /* TCC (5 sub-ranges) */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, + /* TCC range 0 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, + AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, + AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + /* TCC range 1 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + /* TCC range 2 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, + AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN, + AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, + AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + /* TCC range 3 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END = + AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + /* TCC range 4 */ + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, + AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, + AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END = + AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END, + /* TCI */ + AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM, + /* TCP */ + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM = + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM, + AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO, + AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM, + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + /* TD */ + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO = + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI, + AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, + AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, + /* EA (3 sub-ranges) */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, + /* EA range 0 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM, + AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END = + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + /* EA range 1 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, + AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, + AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END = + AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + /* EA range 2 */ + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM, + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END = + AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, + AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END = + AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END, + /* UTC VM L2 bank */ + AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE, + /* UTC VM walker */ + AMDGPU_RAS_BLOCK__UTC_VML2_WALKER, + /* UTC ATC L2 2MB cache */ + AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, + /* UTC ATC L2 4KB cache */ + AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, + AMDGPU_RAS_BLOCK__GFX_MAX +}; + enum amdgpu_ras_error_type { AMDGPU_RAS_ERROR__NONE = 0, AMDGPU_RAS_ERROR__PARITY = 1, @@ -76,9 +307,6 @@ char name[32]; }; -typedef int (*ras_ih_cb)(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry); - struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -106,10 +334,85 @@ struct mutex recovery_lock; uint32_t flags; + + struct amdgpu_ras_eeprom_control eeprom_control; }; -/* interfaces for IP */ +struct ras_fs_data { + char sysfs_name[32]; + char debugfs_name[32]; +}; + +struct ras_err_data { + unsigned long ue_count; + unsigned long ce_count; + unsigned long err_addr_cnt; + uint64_t *err_addr; +}; +struct ras_err_handler_data { + /* point to bad pages array */ + struct { + unsigned long bp; + struct amdgpu_bo *bo; + } *bps; + /* the count of entries */ + int count; + /* the space can place new entries */ + int space_left; + /* last reserved entry's index + 1 */ + int last_reserved; +}; + +typedef int (*ras_ih_cb)(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct amdgpu_iv_entry *entry); + +struct ras_ih_data { + /* interrupt bottom half */ + struct work_struct ih_work; + int inuse; + /* IP callback */ + ras_ih_cb cb; + /* full of entries */ + unsigned char *ring; + unsigned int ring_size; + unsigned int element_size; + unsigned int aligned_element_size; + unsigned int rptr; + unsigned int wptr; +}; + +struct ras_manager { + struct ras_common_if head; + /* reference count */ + int use; + /* ras block link */ + struct list_head node; + /* the device */ + struct amdgpu_device *adev; + /* debugfs */ + struct dentry *ent; + /* sysfs */ + struct device_attribute sysfs_attr; + int attr_inuse; + + /* fs node name */ + struct ras_fs_data fs_data; + + /* IH data */ + struct ras_ih_data ih_data; + + struct ras_err_data err_data; +}; + +struct ras_badpage { + unsigned int bp; + unsigned int size; + unsigned int flags; +}; + +/* interfaces for IP */ struct ras_fs_if { struct ras_common_if head; char sysfs_name[32]; @@ -184,7 +487,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev); void amdgpu_ras_suspend(struct amdgpu_device *adev); -int amdgpu_ras_query_error_count(struct amdgpu_device *adev, +unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce); /* error handling functions */ diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 2019-08-31 15:01:11.843736167 -0500 @@ -29,7 +29,7 @@ #include /* max number of rings */ -#define AMDGPU_MAX_RINGS 24 +#define AMDGPU_MAX_RINGS 28 #define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h 2019-08-31 15:01:11.843736167 -0500 @@ -25,11 +25,17 @@ #define __AMDGPU_SDMA_H__ /* max number of IP instances */ -#define AMDGPU_MAX_SDMA_INSTANCES 2 +#define AMDGPU_MAX_SDMA_INSTANCES 8 enum amdgpu_sdma_irq { AMDGPU_SDMA_IRQ_INSTANCE0 = 0, AMDGPU_SDMA_IRQ_INSTANCE1, + AMDGPU_SDMA_IRQ_INSTANCE2, + AMDGPU_SDMA_IRQ_INSTANCE3, + AMDGPU_SDMA_IRQ_INSTANCE4, + AMDGPU_SDMA_IRQ_INSTANCE5, + AMDGPU_SDMA_IRQ_INSTANCE6, + AMDGPU_SDMA_IRQ_INSTANCE7, AMDGPU_SDMA_IRQ_LAST }; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 2019-08-31 15:01:11.844736167 -0500 @@ -227,7 +227,7 @@ if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; - return drm_vma_node_verify_access(&abo->gem_base.vma_node, + return drm_vma_node_verify_access(&abo->tbo.base.vma_node, filp->private_data); } @@ -440,10 +440,26 @@ r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, new_mem->num_pages << PAGE_SHIFT, - bo->resv, &fence); + bo->base.resv, &fence); if (r) goto error; + /* clear the space being freed */ + if (old_mem->mem_type == TTM_PL_VRAM && + (ttm_to_amdgpu_bo(bo)->flags & + AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE)) { + struct dma_fence *wipe_fence = NULL; + + r = amdgpu_fill_buffer(ttm_to_amdgpu_bo(bo), AMDGPU_POISON, + NULL, &wipe_fence); + if (r) { + goto error; + } else if (wipe_fence) { + dma_fence_put(fence); + fence = wipe_fence; + } + } + /* Always block for VM page tables before committing the new location */ if (bo->type == ttm_bo_type_kernel) r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem); @@ -1478,18 +1494,18 @@ * cleanly handle page faults. */ if (bo->type == ttm_bo_type_kernel && - !reservation_object_test_signaled_rcu(bo->resv, true)) + !reservation_object_test_signaled_rcu(bo->base.resv, true)) return false; /* If bo is a KFD BO, check if the bo belongs to the current process. * If true, then return false as any KFD process needs all its BOs to * be resident to run successfully */ - flist = reservation_object_get_list(bo->resv); + flist = reservation_object_get_list(bo->base.resv); if (flist) { for (i = 0; i < flist->shared_count; ++i) { f = rcu_dereference_protected(flist->shared[i], - reservation_object_held(bo->resv)); + reservation_object_held(bo->base.resv)); if (amdkfd_fence_check_mm(f, current->mm)) return false; } @@ -1599,6 +1615,7 @@ .move = &amdgpu_bo_move, .verify_access = &amdgpu_verify_access, .move_notify = &amdgpu_bo_move_notify, + .release_notify = &amdgpu_bo_release_notify, .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, .io_mem_free = &amdgpu_ttm_io_mem_free, @@ -1721,6 +1738,7 @@ uint64_t gtt_size; int r; u64 vis_vram_limit; + void *stolen_vga_buf; mutex_init(&adev->mman.gtt_window_lock); @@ -1728,7 +1746,7 @@ r = ttm_bo_device_init(&adev->mman.bdev, &amdgpu_bo_driver, adev->ddev->anon_inode->i_mapping, - adev->need_dma32); + dma_addressing_limited(adev->dev)); if (r) { DRM_ERROR("failed initializing buffer object driver(%d).\n", r); return r; @@ -1775,7 +1793,7 @@ r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->stolen_vga_memory, - NULL, NULL); + NULL, &stolen_vga_buf); if (r) return r; DRM_INFO("amdgpu: %uM of VRAM memory ready\n", @@ -1839,8 +1857,9 @@ */ void amdgpu_ttm_late_init(struct amdgpu_device *adev) { + void *stolen_vga_buf; /* return the VGA stolen memory (if any) back to VRAM */ - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); } /** diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 2019-08-31 15:01:11.844736167 -0500 @@ -38,6 +38,8 @@ #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 +#define AMDGPU_POISON 0xd0bed0be + struct amdgpu_mman { struct ttm_bo_device bdev; bool mem_global_referenced; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 2019-08-31 15:01:11.844736167 -0500 @@ -83,8 +83,8 @@ const struct smc_firmware_header_v2_0 *v2_hdr = container_of(v1_hdr, struct smc_firmware_header_v2_0, v1_0); - DRM_INFO("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); - DRM_INFO("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes)); + DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); + DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes)); } else { DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor); } @@ -269,6 +269,16 @@ DRM_DEBUG("kdb_size_bytes: %u\n", le32_to_cpu(psp_hdr_v1_1->kdb_size_bytes)); } + if (version_minor == 2) { + const struct psp_firmware_header_v1_2 *psp_hdr_v1_2 = + container_of(psp_hdr, struct psp_firmware_header_v1_2, v1_0); + DRM_DEBUG("kdb_header_version: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_header_version)); + DRM_DEBUG("kdb_offset_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_offset_bytes)); + DRM_DEBUG("kdb_size_bytes: %u\n", + le32_to_cpu(psp_hdr_v1_2->kdb_size_bytes)); + } } else { DRM_ERROR("Unknown PSP ucode version: %u.%u\n", version_major, version_minor); @@ -350,11 +360,17 @@ case CHIP_RAVEN: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_RENOIR: case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + case CHIP_ARCTURUS: + return AMDGPU_FW_LOAD_DIRECT; + default: DRM_ERROR("Unknown firmware load type\n"); } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h 2019-08-31 15:01:11.844736167 -0500 @@ -90,6 +90,15 @@ uint32_t kdb_size_bytes; }; +/* version_major=1, version_minor=2 */ +struct psp_firmware_header_v1_2 { + struct psp_firmware_header_v1_0 v1_0; + uint32_t reserve[3]; + uint32_t kdb_header_version; + uint32_t kdb_offset_bytes; + uint32_t kdb_size_bytes; +}; + /* version_major=1, version_minor=0 */ struct ta_firmware_header_v1_0 { struct common_firmware_header header; @@ -262,6 +271,12 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_SDMA0 = 0, AMDGPU_UCODE_ID_SDMA1, + AMDGPU_UCODE_ID_SDMA2, + AMDGPU_UCODE_ID_SDMA3, + AMDGPU_UCODE_ID_SDMA4, + AMDGPU_UCODE_ID_SDMA5, + AMDGPU_UCODE_ID_SDMA6, + AMDGPU_UCODE_ID_SDMA7, AMDGPU_UCODE_ID_CP_CE, AMDGPU_UCODE_ID_CP_PFP, AMDGPU_UCODE_ID_CP_ME, @@ -281,6 +296,7 @@ AMDGPU_UCODE_ID_UVD1, AMDGPU_UCODE_ID_VCE, AMDGPU_UCODE_ID_VCN, + AMDGPU_UCODE_ID_VCN1, AMDGPU_UCODE_ID_DMCU_ERAM, AMDGPU_UCODE_ID_DMCU_INTV, AMDGPU_UCODE_ID_VCN0_RAM, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h 2019-08-31 15:01:11.844736167 -0500 @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_UMC_H__ +#define __AMDGPU_UMC_H__ + +/* implement 64 bits REG operations via 32 bits interface */ +#define RREG64_UMC(reg) (RREG32(reg) | \ + ((uint64_t)RREG32((reg) + 1) << 32)) +#define WREG64_UMC(reg, v) \ + do { \ + WREG32((reg), lower_32_bits(v)); \ + WREG32((reg) + 1, upper_32_bits(v)); \ + } while (0) + +/* + * void (*func)(struct amdgpu_device *adev, struct ras_err_data *err_data, + * uint32_t umc_reg_offset, uint32_t channel_index) + */ +#define amdgpu_umc_for_each_channel(func) \ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; \ + uint32_t umc_inst, channel_inst, umc_reg_offset, channel_index; \ + for (umc_inst = 0; umc_inst < adev->umc.umc_inst_num; umc_inst++) { \ + /* enable the index mode to query eror count per channel */ \ + adev->umc.funcs->enable_umc_index_mode(adev, umc_inst); \ + for (channel_inst = 0; \ + channel_inst < adev->umc.channel_inst_num; \ + channel_inst++) { \ + /* calc the register offset according to channel instance */ \ + umc_reg_offset = adev->umc.channel_offs * channel_inst; \ + /* get channel index of interleaved memory */ \ + channel_index = adev->umc.channel_idx_tbl[ \ + umc_inst * adev->umc.channel_inst_num + channel_inst]; \ + (func)(adev, err_data, umc_reg_offset, channel_index); \ + } \ + } \ + adev->umc.funcs->disable_umc_index_mode(adev); + +struct amdgpu_umc_funcs { + void (*ras_init)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + void (*query_ras_error_address)(struct amdgpu_device *adev, + void *ras_error_status); + void (*enable_umc_index_mode)(struct amdgpu_device *adev, + uint32_t umc_instance); + void (*disable_umc_index_mode)(struct amdgpu_device *adev); +}; + +struct amdgpu_umc { + /* max error count in one ras query call */ + uint32_t max_ras_err_cnt_per_query; + /* number of umc channel instance with memory map register access */ + uint32_t channel_inst_num; + /* number of umc instance with memory map register access */ + uint32_t umc_inst_num; + /* UMC regiser per channel offset */ + uint32_t channel_offs; + /* channel index table of interleaved memory */ + const uint32_t *channel_idx_tbl; + + const struct amdgpu_umc_funcs *funcs; +}; + +#endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 2019-08-31 15:01:11.844736167 -0500 @@ -1073,7 +1073,7 @@ ib->length_dw = 16; if (direct) { - r = reservation_object_wait_timeout_rcu(bo->tbo.resv, + r = reservation_object_wait_timeout_rcu(bo->tbo.base.resv, true, false, msecs_to_jiffies(10)); if (r == 0) @@ -1085,7 +1085,7 @@ if (r) goto err_free; } else { - r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv, + r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv, AMDGPU_FENCE_OWNER_UNDEFINED, false); if (r) goto err_free; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 2019-08-31 15:01:11.844736167 -0500 @@ -46,12 +46,20 @@ #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" #define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" #define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" +#define FIRMWARE_ARCTURUS "amdgpu/arcturus_vcn.bin" +#define FIRMWARE_RENOIR "amdgpu/renoir_vcn.bin" #define FIRMWARE_NAVI10 "amdgpu/navi10_vcn.bin" +#define FIRMWARE_NAVI14 "amdgpu/navi14_vcn.bin" +#define FIRMWARE_NAVI12 "amdgpu/navi12_vcn.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); MODULE_FIRMWARE(FIRMWARE_RAVEN2); +MODULE_FIRMWARE(FIRMWARE_ARCTURUS); +MODULE_FIRMWARE(FIRMWARE_RENOIR); MODULE_FIRMWARE(FIRMWARE_NAVI10); +MODULE_FIRMWARE(FIRMWARE_NAVI14); +MODULE_FIRMWARE(FIRMWARE_NAVI12); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -61,7 +69,7 @@ const char *fw_name; const struct common_firmware_header *hdr; unsigned char fw_check; - int r; + int i, r; INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); @@ -74,12 +82,33 @@ else fw_name = FIRMWARE_RAVEN; break; + case CHIP_ARCTURUS: + fw_name = FIRMWARE_ARCTURUS; + break; + case CHIP_RENOIR: + fw_name = FIRMWARE_RENOIR; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; case CHIP_NAVI10: fw_name = FIRMWARE_NAVI10; if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) adev->vcn.indirect_sram = true; break; + case CHIP_NAVI14: + fw_name = FIRMWARE_NAVI14; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; + case CHIP_NAVI12: + fw_name = FIRMWARE_NAVI12; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; + break; default: return -EINVAL; } @@ -133,12 +162,18 @@ bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); - r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, - &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); - return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo, + &adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + return r; + } } if (adev->vcn.indirect_sram) { @@ -156,26 +191,30 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) { - int i; - - kvfree(adev->vcn.saved_bo); + int i, j; if (adev->vcn.indirect_sram) { amdgpu_bo_free_kernel(&adev->vcn.dpg_sram_bo, - &adev->vcn.dpg_sram_gpu_addr, - (void **)&adev->vcn.dpg_sram_cpu_addr); + &adev->vcn.dpg_sram_gpu_addr, + (void **)&adev->vcn.dpg_sram_cpu_addr); } - amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, - &adev->vcn.gpu_addr, - (void **)&adev->vcn.cpu_addr); + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + kvfree(adev->vcn.inst[j].saved_bo); - amdgpu_ring_fini(&adev->vcn.ring_dec); + amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, + &adev->vcn.inst[j].gpu_addr, + (void **)&adev->vcn.inst[j].cpu_addr); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->vcn.ring_enc[i]); + amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); - amdgpu_ring_fini(&adev->vcn.ring_jpeg); + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); + + amdgpu_ring_fini(&adev->vcn.inst[j].ring_jpeg); + } release_firmware(adev->vcn.fw); @@ -186,21 +225,25 @@ { unsigned size; void *ptr; + int i; cancel_delayed_work_sync(&adev->vcn.idle_work); - if (adev->vcn.vcpu_bo == NULL) - return 0; - - size = amdgpu_bo_size(adev->vcn.vcpu_bo); - ptr = adev->vcn.cpu_addr; - - adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL); - if (!adev->vcn.saved_bo) - return -ENOMEM; - - memcpy_fromio(adev->vcn.saved_bo, ptr, size); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return 0; + + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; + + adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); + if (!adev->vcn.inst[i].saved_bo) + return -ENOMEM; + memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); + } return 0; } @@ -208,32 +251,36 @@ { unsigned size; void *ptr; + int i; - if (adev->vcn.vcpu_bo == NULL) - return -EINVAL; - - size = amdgpu_bo_size(adev->vcn.vcpu_bo); - ptr = adev->vcn.cpu_addr; - - if (adev->vcn.saved_bo != NULL) { - memcpy_toio(ptr, adev->vcn.saved_bo, size); - kvfree(adev->vcn.saved_bo); - adev->vcn.saved_bo = NULL; - } else { - const struct common_firmware_header *hdr; - unsigned offset; - - hdr = (const struct common_firmware_header *)adev->vcn.fw->data; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return -EINVAL; + + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; + + if (adev->vcn.inst[i].saved_bo != NULL) { + memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + kvfree(adev->vcn.inst[i].saved_bo); + adev->vcn.inst[i].saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + } + memset_io(ptr, 0, size); } - memset_io(ptr, 0, size); } - return 0; } @@ -241,35 +288,40 @@ { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, vcn.idle_work.work); - unsigned int fences = 0; - unsigned int i; + unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; + unsigned int i, j; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); - } + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); + } - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - struct dpg_pause_state new_state; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; - if (fences) - new_state.fw_based = VCN_DPG_STATE__PAUSE; - else - new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + if (fence[j]) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + + if (amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) - new_state.jpeg = VCN_DPG_STATE__PAUSE; - else - new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + adev->vcn.pause_dpg_mode(adev, &new_state); + } - adev->vcn.pause_dpg_mode(adev, &new_state); + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_jpeg); + fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); + fences += fence[j]; } - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec); - if (fences == 0) { amdgpu_gfx_off_ctrl(adev, true); - if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) + if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, false); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -286,7 +338,7 @@ if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); - if (adev->asic_type < CHIP_NAVI10 && adev->pm.dpm_enabled) + if (adev->asic_type < CHIP_ARCTURUS && adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, true); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, @@ -299,14 +351,14 @@ unsigned int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); + fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); } if (fences) new_state.fw_based = VCN_DPG_STATE__PAUSE; else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) + if (amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_jpeg)) new_state.jpeg = VCN_DPG_STATE__PAUSE; else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; @@ -332,7 +384,7 @@ unsigned i; int r; - WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD); + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; @@ -340,7 +392,7 @@ amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.external.scratch9); + tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); if (tmp == 0xDEADBEEF) break; udelay(1); @@ -651,7 +703,7 @@ unsigned i; int r; - WREG32(adev->vcn.external.jpeg_pitch, 0xCAFEDEAD); + WREG32(adev->vcn.inst[ring->me].external.jpeg_pitch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) return r; @@ -661,7 +713,7 @@ amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.external.jpeg_pitch); + tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); if (tmp == 0xDEADBEEF) break; udelay(1); @@ -735,7 +787,7 @@ } for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.external.jpeg_pitch); + tmp = RREG32(adev->vcn.inst[ring->me].external.jpeg_pitch); if (tmp == 0xDEADBEEF) break; udelay(1); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 2019-08-31 15:09:42.578781293 -0500 @@ -30,6 +30,11 @@ #define AMDGPU_VCN_FIRMWARE_OFFSET 256 #define AMDGPU_VCN_MAX_ENC_RINGS 3 +#define AMDGPU_MAX_VCN_INSTANCES 2 + +#define AMDGPU_VCN_HARVEST_VCN0 (1 << 0) +#define AMDGPU_VCN_HARVEST_VCN1 (1 << 1) + #define VCN_DEC_KMD_CMD 0x80000000 #define VCN_DEC_CMD_FENCE 0x00000000 #define VCN_DEC_CMD_TRAP 0x00000001 @@ -146,34 +151,49 @@ unsigned data1; unsigned cmd; unsigned nop; + unsigned context_id; + unsigned ib_vmid; + unsigned ib_bar_low; + unsigned ib_bar_high; + unsigned ib_size; + unsigned gp_scratch8; unsigned scratch9; unsigned jpeg_pitch; }; -struct amdgpu_vcn { +struct amdgpu_vcn_inst { struct amdgpu_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; - unsigned fw_version; void *saved_bo; - struct delayed_work idle_work; - const struct firmware *fw; /* VCN firmware */ struct amdgpu_ring ring_dec; struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; struct amdgpu_ring ring_jpeg; struct amdgpu_irq_src irq; + struct amdgpu_vcn_reg external; +}; + +struct amdgpu_vcn { + unsigned fw_version; + struct delayed_work idle_work; + const struct firmware *fw; /* VCN firmware */ unsigned num_enc_rings; enum amd_powergating_state cur_state; struct dpg_pause_state pause_state; - struct amdgpu_vcn_reg internal, external; - int (*pause_dpg_mode)(struct amdgpu_device *adev, - struct dpg_pause_state *new_state); bool indirect_sram; struct amdgpu_bo *dpg_sram_bo; void *dpg_sram_cpu_addr; uint64_t dpg_sram_gpu_addr; uint32_t *dpg_sram_curr_addr; + + uint8_t num_vcn_inst; + struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; + struct amdgpu_vcn_reg internal; + + unsigned harvest_config; + int (*pause_dpg_mode)(struct amdgpu_device *adev, + struct dpg_pause_state *new_state); }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c 2019-08-31 15:01:11.844736167 -0500 @@ -430,48 +430,3 @@ return clk; } - -void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev) -{ - struct amdgpu_virt *virt = &adev->virt; - - if (virt->ops && virt->ops->init_reg_access_mode) - virt->ops->init_reg_access_mode(adev); -} - -bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev) -{ - bool ret = false; - struct amdgpu_virt *virt = &adev->virt; - - if (amdgpu_sriov_vf(adev) - && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH)) - ret = true; - - return ret; -} - -bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev) -{ - bool ret = false; - struct amdgpu_virt *virt = &adev->virt; - - if (amdgpu_sriov_vf(adev) - && (virt->reg_access_mode & AMDGPU_VIRT_REG_ACCESS_RLC) - && !(amdgpu_sriov_runtime(adev))) - ret = true; - - return ret; -} - -bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev) -{ - bool ret = false; - struct amdgpu_virt *virt = &adev->virt; - - if (amdgpu_sriov_vf(adev) - && (virt->reg_access_mode & AMDGPU_VIRT_REG_SKIP_SEETING)) - ret = true; - - return ret; -} diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h 2019-08-31 15:01:11.844736167 -0500 @@ -48,12 +48,6 @@ uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE]; }; -/* According to the fw feature, some new reg access modes are supported */ -#define AMDGPU_VIRT_REG_ACCESS_LEGACY (1 << 0) /* directly mmio */ -#define AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH (1 << 1) /* by PSP */ -#define AMDGPU_VIRT_REG_ACCESS_RLC (1 << 2) /* by RLC */ -#define AMDGPU_VIRT_REG_SKIP_SEETING (1 << 3) /* Skip setting reg */ - /** * struct amdgpu_virt_ops - amdgpu device virt operations */ @@ -65,7 +59,6 @@ void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); int (*get_pp_clk)(struct amdgpu_device *adev, u32 type, char *buf); int (*force_dpm_level)(struct amdgpu_device *adev, u32 level); - void (*init_reg_access_mode)(struct amdgpu_device *adev); }; /* @@ -315,10 +308,4 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); uint32_t amdgpu_virt_get_sclk(struct amdgpu_device *adev, bool lowest); uint32_t amdgpu_virt_get_mclk(struct amdgpu_device *adev, bool lowest); - -void amdgpu_virt_init_reg_access_mode(struct amdgpu_device *adev); -bool amdgpu_virt_support_psp_prg_ih_reg(struct amdgpu_device *adev); -bool amdgpu_virt_support_rlc_prg_reg(struct amdgpu_device *adev); -bool amdgpu_virt_support_skip_setting(struct amdgpu_device *adev); - #endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 2019-08-31 15:01:11.845736167 -0500 @@ -302,7 +302,7 @@ base->next = bo->vm_bo; bo->vm_bo = base; - if (bo->tbo.resv != vm->root.base.bo->tbo.resv) + if (bo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) return; vm->bulk_moveable = false; @@ -583,7 +583,7 @@ for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - if (abo->tbo.resv == vm->root.base.bo->tbo.resv) + if (abo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) vm->bulk_moveable = false; } @@ -834,7 +834,7 @@ bp->flags |= AMDGPU_GEM_CREATE_SHADOW; bp->type = ttm_bo_type_kernel; if (vm->root.base.bo) - bp->resv = vm->root.base.bo->tbo.resv; + bp->resv = vm->root.base.bo->tbo.base.resv; } /** @@ -1574,7 +1574,7 @@ flags &= ~AMDGPU_PTE_EXECUTABLE; flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; - if (adev->asic_type == CHIP_NAVI10) { + if (adev->asic_type >= CHIP_NAVI10) { flags &= ~AMDGPU_PTE_MTYPE_NV10_MASK; flags |= (mapping->flags & AMDGPU_PTE_MTYPE_NV10_MASK); } else { @@ -1702,7 +1702,7 @@ ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); pages_addr = ttm->dma_address; } - exclusive = reservation_object_get_excl(bo->tbo.resv); + exclusive = reservation_object_get_excl(bo->tbo.base.resv); } if (bo) { @@ -1712,7 +1712,7 @@ flags = 0x0; } - if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv)) + if (clear || (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv)) last_update = &vm->last_update; else last_update = &bo_va->last_pt_update; @@ -1743,7 +1743,7 @@ * the evicted list so that it gets validated again on the * next command submission. */ - if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) { + if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { uint32_t mem_type = bo->tbo.mem.mem_type; if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) @@ -1879,7 +1879,7 @@ */ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct reservation_object *resv = vm->root.base.bo->tbo.resv; + struct reservation_object *resv = vm->root.base.bo->tbo.base.resv; struct dma_fence *excl, **shared; unsigned i, shared_count; int r; @@ -1993,7 +1993,7 @@ while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); - resv = bo_va->base.bo->tbo.resv; + resv = bo_va->base.bo->tbo.base.resv; spin_unlock(&vm->invalidated_lock); /* Try to reserve the BO to avoid clearing its ptes */ @@ -2084,7 +2084,7 @@ if (mapping->flags & AMDGPU_PTE_PRT) amdgpu_vm_prt_get(adev); - if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && + if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv && !bo_va->base.moved) { list_move(&bo_va->base.vm_status, &vm->moved); } @@ -2416,7 +2416,8 @@ struct amdgpu_bo *bo; bo = mapping->bo_va->base.bo; - if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket) + if (reservation_object_locking_ctx(bo->tbo.base.resv) != + ticket) continue; } @@ -2443,7 +2444,7 @@ struct amdgpu_vm_bo_base **base; if (bo) { - if (bo->tbo.resv == vm->root.base.bo->tbo.resv) + if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) vm->bulk_moveable = false; for (base = &bo_va->base.bo->vm_bo; *base; @@ -2507,7 +2508,7 @@ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { + if (evicted && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { amdgpu_vm_bo_evicted(bo_base); continue; } @@ -2518,7 +2519,7 @@ if (bo->tbo.type == ttm_bo_type_kernel) amdgpu_vm_bo_relocated(bo_base); - else if (bo->tbo.resv == vm->root.base.bo->tbo.resv) + else if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) amdgpu_vm_bo_moved(bo_base); else amdgpu_vm_bo_invalidated(bo_base); @@ -2648,7 +2649,7 @@ */ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) { - return reservation_object_wait_timeout_rcu(vm->root.base.bo->tbo.resv, + return reservation_object_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv, true, true, timeout); } @@ -2723,7 +2724,7 @@ if (r) goto error_free_root; - r = reservation_object_reserve_shared(root->tbo.resv, 1); + r = reservation_object_reserve_shared(root->tbo.base.resv, 1); if (r) goto error_unreserve; @@ -2862,6 +2863,13 @@ WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); + if (vm->use_cpu_for_update) + vm->update_funcs = &amdgpu_vm_cpu_funcs; + else + vm->update_funcs = &amdgpu_vm_sdma_funcs; + dma_fence_put(vm->last_update); + vm->last_update = NULL; + if (vm->pasid) { unsigned long flags; @@ -3060,12 +3068,12 @@ switch (args->in.op) { case AMDGPU_VM_OP_RESERVE_VMID: /* current, we only have requirement to reserve vmid from gfxhub */ - r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); + r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); if (r) return r; break; case AMDGPU_VM_OP_UNRESERVE_VMID: - amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB); + amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); break; default: return -EINVAL; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 2019-08-31 15:01:11.845736167 -0500 @@ -90,7 +90,7 @@ | AMDGPU_PTE_WRITEABLE \ | AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_CC)) -/* NAVI10 only */ +/* gfx10 */ #define AMDGPU_PTE_MTYPE_NV10(a) ((uint64_t)(a) << 48) #define AMDGPU_PTE_MTYPE_NV10_MASK AMDGPU_PTE_MTYPE_NV10(7ULL) @@ -100,9 +100,10 @@ #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 /* max number of VMHUB */ -#define AMDGPU_MAX_VMHUBS 2 -#define AMDGPU_GFXHUB 0 -#define AMDGPU_MMHUB 1 +#define AMDGPU_MAX_VMHUBS 3 +#define AMDGPU_GFXHUB_0 0 +#define AMDGPU_MMHUB_0 1 +#define AMDGPU_MMHUB_1 2 /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (1ULL << 20) diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 2019-08-31 15:01:11.845736167 -0500 @@ -72,7 +72,7 @@ if (r) return r; - r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv, + r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.base.resv, owner, false); if (r) return r; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c 2019-08-31 15:01:11.845736167 -0500 @@ -25,7 +25,7 @@ #include "amdgpu.h" #include "amdgpu_xgmi.h" #include "amdgpu_smu.h" - +#include "df/df_3_6_offset.h" static DEFINE_MUTEX(xgmi_mutex); @@ -131,9 +131,37 @@ } +#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) +static ssize_t amdgpu_xgmi_show_error(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + uint32_t ficaa_pie_ctl_in, ficaa_pie_status_in; + uint64_t fica_out; + unsigned int error_count = 0; + + ficaa_pie_ctl_in = AMDGPU_XGMI_SET_FICAA(0x200); + ficaa_pie_status_in = AMDGPU_XGMI_SET_FICAA(0x208); -static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); + fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_ctl_in); + if (fica_out != 0x1f) + pr_err("xGMI error counters not enabled!\n"); + + fica_out = adev->df_funcs->get_fica(adev, ficaa_pie_status_in); + + if ((fica_out & 0xffff) == 2) + error_count = ((fica_out >> 62) & 0x1) + (fica_out >> 63); + adev->df_funcs->set_fica(adev, ficaa_pie_status_in, 0, 0); + + return snprintf(buf, PAGE_SIZE, "%d\n", error_count); +} + + +static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); +static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, struct amdgpu_hive_info *hive) @@ -148,6 +176,12 @@ return ret; } + /* Create xgmi error file */ + ret = device_create_file(adev->dev, &dev_attr_xgmi_error); + if (ret) + pr_err("failed to create xgmi_error\n"); + + /* Create sysfs link to hive info folder on the first device */ if (adev != hive->adev) { ret = sysfs_create_link(&adev->dev->kobj, hive->kobj, @@ -248,7 +282,7 @@ dev_dbg(adev->dev, "Set xgmi pstate %d.\n", pstate); - if (is_support_sw_smu(adev)) + if (is_support_sw_smu_xgmi(adev)) ret = smu_set_xgmi_pstate(&adev->smu, pstate); if (ret) dev_err(adev->dev, @@ -296,23 +330,28 @@ struct amdgpu_xgmi *entry; struct amdgpu_device *tmp_adev = NULL; - int count = 0, ret = -EINVAL; + int count = 0, ret = 0; if (!adev->gmc.xgmi.supported) return 0; - ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); - if (ret) { - dev_err(adev->dev, - "XGMI: Failed to get node id\n"); - return ret; - } + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { + ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); + if (ret) { + dev_err(adev->dev, + "XGMI: Failed to get hive id\n"); + return ret; + } - ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); - if (ret) { - dev_err(adev->dev, - "XGMI: Failed to get hive id\n"); - return ret; + ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); + if (ret) { + dev_err(adev->dev, + "XGMI: Failed to get node id\n"); + return ret; + } + } else { + adev->gmc.xgmi.hive_id = 16; + adev->gmc.xgmi.node_id = adev->gmc.xgmi.physical_node_id + 16; } hive = amdgpu_get_xgmi_hive(adev, 1); @@ -332,29 +371,32 @@ top_info->num_nodes = count; hive->number_devices = count; - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - /* update node list for other device in the hive */ - if (tmp_adev != adev) { - top_info = &tmp_adev->psp.xgmi_context.top_info; - top_info->nodes[count - 1].node_id = adev->gmc.xgmi.node_id; - top_info->num_nodes = count; + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP)) { + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + /* update node list for other device in the hive */ + if (tmp_adev != adev) { + top_info = &tmp_adev->psp.xgmi_context.top_info; + top_info->nodes[count - 1].node_id = + adev->gmc.xgmi.node_id; + top_info->num_nodes = count; + } + ret = amdgpu_xgmi_update_topology(hive, tmp_adev); + if (ret) + goto exit; } - ret = amdgpu_xgmi_update_topology(hive, tmp_adev); - if (ret) - goto exit; - } - /* get latest topology info for each device from psp */ - list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, - &tmp_adev->psp.xgmi_context.top_info); - if (ret) { - dev_err(tmp_adev->dev, - "XGMI: Get topology failure on device %llx, hive %llx, ret %d", - tmp_adev->gmc.xgmi.node_id, - tmp_adev->gmc.xgmi.hive_id, ret); - /* To do : continue with some node failed or disable the whole hive */ - goto exit; + /* get latest topology info for each device from psp */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, + &tmp_adev->psp.xgmi_context.top_info); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Get topology failure on device %llx, hive %llx, ret %d", + tmp_adev->gmc.xgmi.node_id, + tmp_adev->gmc.xgmi.hive_id, ret); + /* To do : continue with some node failed or disable the whole hive */ + goto exit; + } } } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/arct_reg_init.c 2019-08-31 15:01:11.845736167 -0500 @@ -0,0 +1,59 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "soc15.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "arct_ip_offset.h" + +int arct_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the block needed by our driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i])); + adev->reg_offset[SDMA2_HWIP][i] = (uint32_t *)(&(SDMA2_BASE.instance[i])); + adev->reg_offset[SDMA3_HWIP][i] = (uint32_t *)(&(SDMA3_BASE.instance[i])); + adev->reg_offset[SDMA4_HWIP][i] = (uint32_t *)(&(SDMA4_BASE.instance[i])); + adev->reg_offset[SDMA5_HWIP][i] = (uint32_t *)(&(SDMA5_BASE.instance[i])); + adev->reg_offset[SDMA6_HWIP][i] = (uint32_t *)(&(SDMA6_BASE.instance[i])); + adev->reg_offset[SDMA7_HWIP][i] = (uint32_t *)(&(SDMA7_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + } + return 0; +} + + diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v1_0.c 2019-08-31 15:01:11.845736167 -0500 @@ -0,0 +1,103 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "athub_v1_0.h" + +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "vega10_enum.h" + +#include "soc15_common.h" + +static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; + + if (def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && + (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + else + data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; + + if(def != data) + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); +} + +int athub_v1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_VEGA20: + case CHIP_RAVEN: + athub_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + athub_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_ATHUB_MGCG */ + data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); + if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_MGCG; + + /* AMD_CG_SUPPORT_ATHUB_LS */ + if (data & ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_ATHUB_LS; +} diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v1_0.h 2019-08-31 15:01:11.845736167 -0500 @@ -0,0 +1,30 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __ATHUB_V1_0_H__ +#define __ATHUB_V1_0_H__ + +int athub_v1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void athub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); + +#endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c 2019-08-31 15:01:11.845736167 -0500 @@ -74,6 +74,8 @@ switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: athub_v2_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); athub_v2_0_update_medium_grain_light_sleep(adev, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/cik.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/cik.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/cik.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/cik.c 2019-08-31 15:01:11.845736167 -0500 @@ -1291,6 +1291,12 @@ return r; } +static enum amd_reset_method +cik_asic_reset_method(struct amdgpu_device *adev) +{ + return AMD_RESET_METHOD_LEGACY; +} + static u32 cik_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -1823,6 +1829,7 @@ .read_bios_from_rom = &cik_read_bios_from_rom, .read_register = &cik_read_register, .reset = &cik_asic_reset, + .reset_method = &cik_asic_reset_method, .set_vga_state = &cik_vga_set_state, .get_xclk = &cik_get_xclk, .set_uvd_clocks = &cik_set_uvd_clocks, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c 2019-08-31 15:01:11.846736167 -0500 @@ -236,6 +236,7 @@ int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; u32 tmp; /* flip at hsync for async, default is vsync */ @@ -243,6 +244,9 @@ tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0); WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the primary scanout address */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c 2019-08-31 15:01:11.846736167 -0500 @@ -254,6 +254,7 @@ int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; u32 tmp; /* flip immediate for async, default is vsync */ @@ -261,6 +262,9 @@ tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL, GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0); WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c 2019-08-31 15:01:11.846736167 -0500 @@ -191,10 +191,14 @@ int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; /* flip at hsync for async, default is vsync */ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ? GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c 2019-08-31 15:01:11.846736167 -0500 @@ -184,10 +184,14 @@ int crtc_id, u64 crtc_base, bool async) { struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id]; + struct drm_framebuffer *fb = amdgpu_crtc->base.primary->fb; /* flip at hsync for async, default is vsync */ WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, async ? GRPH_FLIP_CONTROL__GRPH_SURFACE_UPDATE_H_RETRACE_EN_MASK : 0); + /* update pitch */ + WREG32(mmGRPH_PITCH + amdgpu_crtc->crtc_offset, + fb->pitches[0] / fb->format->cpp[0]); /* update the primary scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_virtual.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_virtual.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/dce_virtual.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/dce_virtual.c 2019-08-31 15:01:11.846736167 -0500 @@ -454,13 +454,8 @@ #endif /* no DCE */ break; - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_NAVI10: - break; default: - DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type); + break; } return 0; } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/df_v3_6.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/df_v3_6.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/df_v3_6.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/df_v3_6.c 2019-08-31 15:01:11.846736167 -0500 @@ -93,6 +93,96 @@ NULL }; +static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev, + uint32_t ficaa_val) +{ + unsigned long flags, address, data; + uint32_t ficadl_val, ficadh_val; + + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); + WREG32(data, ficaa_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); + ficadl_val = RREG32(data); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); + ficadh_val = RREG32(data); + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + + return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val); +} + +static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val, + uint32_t ficadl_val, uint32_t ficadh_val) +{ + unsigned long flags, address, data; + + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3); + WREG32(data, ficaa_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo3); + WREG32(data, ficadl_val); + + WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3); + WREG32(data, ficadh_val); + + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* + * df_v3_6_perfmon_rreg - read perfmon lo and hi + * + * required to be atomic. no mmio method provided so subsequent reads for lo + * and hi require to preserve df finite state machine + */ +static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev, + uint32_t lo_addr, uint32_t *lo_val, + uint32_t hi_addr, uint32_t *hi_val) +{ + unsigned long flags, address, data; + + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, lo_addr); + *lo_val = RREG32(data); + WREG32(address, hi_addr); + *hi_val = RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + +/* + * df_v3_6_perfmon_wreg - write to perfmon lo and hi + * + * required to be atomic. no mmio method provided so subsequent reads after + * data writes cannot occur to preserve data fabrics finite state machine. + */ +static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr, + uint32_t lo_val, uint32_t hi_addr, uint32_t hi_val) +{ + unsigned long flags, address, data; + + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + WREG32(address, lo_addr); + WREG32(data, lo_val); + WREG32(address, hi_addr); + WREG32(data, hi_val); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + /* get the number of df counters available */ static ssize_t df_v3_6_get_df_cntr_avail(struct device *dev, struct device_attribute *attr, @@ -268,6 +358,10 @@ uint32_t *lo_val, uint32_t *hi_val) { + + uint32_t eventsel, instance, unitmask; + uint32_t instance_10, instance_5432, instance_76; + df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, hi_base_addr); if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { @@ -276,40 +370,33 @@ return -ENXIO; } - if (lo_val && hi_val) { - uint32_t eventsel, instance, unitmask; - uint32_t instance_10, instance_5432, instance_76; - - eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; - unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; - instance = DF_V3_6_GET_INSTANCE(config); - - instance_10 = instance & 0x3; - instance_5432 = (instance >> 2) & 0xf; - instance_76 = (instance >> 6) & 0x3; + eventsel = DF_V3_6_GET_EVENT(config) & 0x3f; + unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf; + instance = DF_V3_6_GET_INSTANCE(config); + + instance_10 = instance & 0x3; + instance_5432 = (instance >> 2) & 0xf; + instance_76 = (instance >> 6) & 0x3; - *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel; - *hi_val = (instance_76 << 29) | instance_5432; - } + *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22); + *hi_val = (instance_76 << 29) | instance_5432; + + DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", + config, *lo_base_addr, *hi_base_addr, *lo_val, *hi_val); return 0; } -/* assign df performance counters for read */ -static int df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev, - uint64_t config, - int *is_assigned) +/* add df performance counters for read */ +static int df_v3_6_pmc_add_cntr(struct amdgpu_device *adev, + uint64_t config) { int i, target_cntr; - *is_assigned = 0; - target_cntr = df_v3_6_pmc_config_2_cntr(adev, config); - if (target_cntr >= 0) { - *is_assigned = 1; + if (target_cntr >= 0) return 0; - } for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) { if (adev->df_perfmon_config_assign_mask[i] == 0U) { @@ -344,45 +431,13 @@ if ((lo_base_addr == 0) || (hi_base_addr == 0)) return; - WREG32_PCIE(lo_base_addr, 0UL); - WREG32_PCIE(hi_base_addr, 0UL); -} - - -static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev, - uint64_t config) -{ - uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; - int ret, is_assigned; - - ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned); - - if (ret || is_assigned) - return ret; - - ret = df_v3_6_pmc_get_ctrl_settings(adev, - config, - &lo_base_addr, - &hi_base_addr, - &lo_val, - &hi_val); - - if (ret) - return ret; - - DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", - config, lo_base_addr, hi_base_addr, lo_val, hi_val); - - WREG32_PCIE(lo_base_addr, lo_val); - WREG32_PCIE(hi_base_addr, hi_val); - - return ret; + df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0); } static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config, int is_enable) { - uint32_t lo_base_addr, hi_base_addr, lo_val; + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; int ret = 0; switch (adev->asic_type) { @@ -391,24 +446,20 @@ df_v3_6_reset_perfmon_cntr(adev, config); if (is_enable) { - ret = df_v3_6_add_perfmon_cntr(adev, config); + ret = df_v3_6_pmc_add_cntr(adev, config); } else { ret = df_v3_6_pmc_get_ctrl_settings(adev, config, &lo_base_addr, &hi_base_addr, - NULL, - NULL); + &lo_val, + &hi_val); if (ret) return ret; - lo_val = RREG32_PCIE(lo_base_addr); - - DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", - config, lo_base_addr, hi_base_addr, lo_val); - - WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22)); + df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val, + hi_base_addr, hi_val); } break; @@ -422,7 +473,7 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config, int is_disable) { - uint32_t lo_base_addr, hi_base_addr, lo_val; + uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val; int ret = 0; switch (adev->asic_type) { @@ -431,18 +482,13 @@ config, &lo_base_addr, &hi_base_addr, - NULL, - NULL); + &lo_val, + &hi_val); if (ret) return ret; - lo_val = RREG32_PCIE(lo_base_addr); - - DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x", - config, lo_base_addr, hi_base_addr, lo_val); - - WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22)); + df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0); if (is_disable) df_v3_6_pmc_release_cntr(adev, config); @@ -471,8 +517,8 @@ if ((lo_base_addr == 0) || (hi_base_addr == 0)) return; - lo_val = RREG32_PCIE(lo_base_addr); - hi_val = RREG32_PCIE(hi_base_addr); + df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val, + hi_base_addr, &hi_val); *count = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL); @@ -480,7 +526,7 @@ *count = 0; DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x", - config, lo_base_addr, hi_base_addr, lo_val, hi_val); + config, lo_base_addr, hi_base_addr, lo_val, hi_val); break; @@ -499,5 +545,7 @@ .get_clockgating_state = df_v3_6_get_clockgating_state, .pmc_start = df_v3_6_pmc_start, .pmc_stop = df_v3_6_pmc_stop, - .pmc_get_count = df_v3_6_pmc_get_count + .pmc_get_count = df_v3_6_pmc_get_count, + .get_fica = df_v3_6_get_fica, + .set_fica = df_v3_6_set_fica }; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c 2019-08-31 15:01:11.847736167 -0500 @@ -357,7 +357,7 @@ void gfxhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c 2019-08-31 15:01:11.847736167 -0500 @@ -140,7 +140,7 @@ /* XXX for emulation, Refer to closed source code.*/ tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); - tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); tmp = REG_SET_FIELD(tmp, GCVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL, tmp); @@ -333,7 +333,7 @@ void gfxhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(GC, 0, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 2019-08-31 15:01:12.277736205 -0500 @@ -20,8 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ + +#include +#include #include -#include +#include +#include #include "amdgpu.h" #include "amdgpu_gfx.h" #include "amdgpu_psp.h" @@ -56,6 +60,9 @@ #define F32_CE_PROGRAM_RAM_SIZE 65536 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define mmCGTT_GS_NGG_CLK_CTRL 0x5087 +#define mmCGTT_GS_NGG_CLK_CTRL_BASE_IDX 1 + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -63,6 +70,20 @@ MODULE_FIRMWARE("amdgpu/navi10_mec2.bin"); MODULE_FIRMWARE("amdgpu/navi10_rlc.bin"); +MODULE_FIRMWARE("amdgpu/navi14_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi14_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi14_me.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi14_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi14_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/navi12_ce.bin"); +MODULE_FIRMWARE("amdgpu/navi12_pfp.bin"); +MODULE_FIRMWARE("amdgpu/navi12_me.bin"); +MODULE_FIRMWARE("amdgpu/navi12_mec.bin"); +MODULE_FIRMWARE("amdgpu/navi12_mec2.bin"); +MODULE_FIRMWARE("amdgpu/navi12_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -109,6 +130,99 @@ /* Pending on emulation bring up */ }; +static const struct soc15_reg_golden golden_settings_gc_10_1_1[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x003c0014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xc0000000, 0xc0000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xf8ff0fff, 0x60000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0x40000ff0, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000002, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x00000200, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04900000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x000007ff, 0x000001fe), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffe7), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0x60000010, 0x479c0010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000), +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_2[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0x003e001f, 0x003c0014), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_GS_NGG_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_IA_CLK_CTRL, 0xffff0fff, 0xffff0100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CLK_CTRL, 0xff7f0fff, 0xc0000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQ_CLK_CTRL, 0xffffcfff, 0x60000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SQG_CLK_CTRL, 0xffff0fff, 0x40000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_VGT_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_WD_CLK_CTRL, 0xffff8fff, 0xffff8100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_VC5_ENABLE, 0x00000003, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0x800007ff, 0x000005ff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG, 0xffffffff, 0x20000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xffffffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x04800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DFSM_TILES_IN_FLIGHT, 0x0000ffff, 0x0000003f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_LAST_OF_BURST_CONFIG, 0xffffffff, 0x03860204), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff0ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PRIV_CONTROL, 0x00007fff, 0x000001fe), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0xffffffff, 0xe4e4e4e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x10321032), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x02310231), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_0, 0xffffffff, 0x842a4c02), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04440000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000820, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CNTL, 0xffdf80ff, 0x479c0010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00800000) +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_nv14[] = +{ + /* Pending on emulation bring up */ +}; + +static const struct soc15_reg_golden golden_settings_gc_10_1_2_nv12[] = +{ + /* Pending on emulation bring up */ +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -250,6 +364,22 @@ golden_settings_gc_10_0_nv10, (const u32)ARRAY_SIZE(golden_settings_gc_10_0_nv10)); break; + case CHIP_NAVI14: + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_1, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_1)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_nv14, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_nv14)); + break; + case CHIP_NAVI12: + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_2, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2)); + soc15_program_register_sequence(adev, + golden_settings_gc_10_1_2_nv12, + (const u32)ARRAY_SIZE(golden_settings_gc_10_1_2_nv12)); + break; default: break; } @@ -331,7 +461,7 @@ if (amdgpu_emu_mode == 1) msleep(1); else - DRM_UDELAY(1); + udelay(1); } if (i < adev->usec_timeout) { if (amdgpu_emu_mode == 1) @@ -481,6 +611,12 @@ case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; default: BUG(); } @@ -1026,6 +1162,8 @@ switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1133,6 +1271,8 @@ switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 2; adev->gfx.me.num_queue_per_pipe = 1; @@ -1452,6 +1592,25 @@ } } +static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); + } +} + + static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev) { int i, j, k; @@ -1461,7 +1620,8 @@ u32 utcl_invreq_disable = 0; /* * GCRD_TARGETS_DISABLE field contains - * for Navi10: GL1C=[18:15], SQC=[14:10], TCP=[9:0] + * for Navi10/Navi12: GL1C=[18:15], SQC=[14:10], TCP=[9:0] + * for Navi14: GL1C=[21:18], SQC=[17:12], TCP=[11:0] */ u32 gcrd_targets_disable_mask = amdgpu_gfx_create_bitmask( 2 * max_wgp_per_sh + /* TCP */ @@ -1469,7 +1629,8 @@ 4); /* GL1C */ /* * UTCL1_UTCL0_INVREQ_DISABLE field contains - * for Navi10: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] + * for Navi10Navi12: SQG=[24], RMI=[23:20], SQC=[19:10], TCP=[9:0] + * for Navi14: SQG=[28], RMI=[27:24], SQC=[23:12], TCP=[11:0] */ u32 utcl_invreq_disable_mask = amdgpu_gfx_create_bitmask( 2 * max_wgp_per_sh + /* TCP */ @@ -1477,7 +1638,9 @@ 4 + /* RMI */ 1); /* SQG */ - if (adev->asic_type == CHIP_NAVI10) { + if (adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14 || + adev->asic_type == CHIP_NAVI12) { mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { @@ -1535,7 +1698,7 @@ /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { nv_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, DEFAULT_SH_MEM_CONFIG); @@ -1552,6 +1715,7 @@ mutex_unlock(&adev->srbm_mutex); gfx_v10_0_init_compute_vmid(adev); + gfx_v10_0_init_gds_vmid(adev); } @@ -1584,9 +1748,12 @@ static void gfx_v10_0_init_pg(struct amdgpu_device *adev) { + int i; + gfx_v10_0_init_csb(adev); - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); /* TODO: init power gating */ return; @@ -1624,9 +1791,9 @@ * hence no handshake between SMU & RLC * GFXOFF will be disabled */ - rlc_pg_cntl |= 0x80000; + rlc_pg_cntl |= 0x800000; } else - rlc_pg_cntl &= ~0x80000; + rlc_pg_cntl &= ~0x800000; WREG32_SOC15(GC, 0, mmRLC_PG_CNTL, rlc_pg_cntl); } @@ -3614,20 +3781,12 @@ static int gfx_v10_0_suspend(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - adev->in_suspend = true; - return gfx_v10_0_hw_fini(adev); + return gfx_v10_0_hw_fini(handle); } static int gfx_v10_0_resume(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; - - r = gfx_v10_0_hw_init(adev); - adev->in_suspend = false; - return r; + return gfx_v10_0_hw_init(handle); } static bool gfx_v10_0_is_idle(void *handle) @@ -4037,6 +4196,7 @@ bool enable = (state == AMD_PG_STATE_GATE) ? true : false; switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: if (!enable) { amdgpu_gfx_off_ctrl(adev, false); cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); @@ -4056,6 +4216,8 @@ switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; @@ -4453,7 +4615,7 @@ if (ring->trail_seq == le32_to_cpu(*(ring->trail_fence_cpu_addr))) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) { @@ -4927,7 +5089,7 @@ .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_gfx, .get_wptr = gfx_v10_0_ring_get_wptr_gfx, .set_wptr = gfx_v10_0_ring_set_wptr_gfx, @@ -4978,7 +5140,7 @@ .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_compute, .get_wptr = gfx_v10_0_ring_get_wptr_compute, .set_wptr = gfx_v10_0_ring_set_wptr_compute, @@ -5011,7 +5173,7 @@ .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v10_0_ring_get_rptr_compute, .get_wptr = gfx_v10_0_ring_get_wptr_compute, .set_wptr = gfx_v10_0_ring_set_wptr_compute, @@ -5088,6 +5250,8 @@ { switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; default: diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 2019-08-31 15:01:11.847736167 -0500 @@ -1890,6 +1890,24 @@ } } +static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); + } +} + static void gfx_v7_0_config_init(struct amdgpu_device *adev) { adev->gfx.config.double_offchip_lds_buf = 1; @@ -1968,6 +1986,7 @@ mutex_unlock(&adev->srbm_mutex); gfx_v7_0_init_compute_vmid(adev); + gfx_v7_0_init_gds_vmid(adev); WREG32(mmSX_DEBUG_1, 0x20); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 2019-08-31 15:01:12.274736205 -0500 @@ -3750,6 +3750,24 @@ } } +static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); + WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); + WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); + WREG32(amdgpu_gds_reg_offset[vmid].oa, 0); + } +} + static void gfx_v8_0_config_init(struct amdgpu_device *adev) { switch (adev->asic_type) { @@ -3816,6 +3834,7 @@ mutex_unlock(&adev->srbm_mutex); gfx_v8_0_init_compute_vmid(adev); + gfx_v8_0_init_gds_vmid(adev); mutex_lock(&adev->grbm_idx_mutex); /* diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 2019-08-31 15:09:16.619778999 -0500 @@ -36,10 +36,10 @@ #include "gc/gc_9_0_offset.h" #include "gc/gc_9_0_sh_mask.h" + #include "vega10_enum.h" #include "hdp/hdp_4_0_offset.h" -#include "soc15.h" #include "soc15_common.h" #include "clearstate_gfx9.h" #include "v9_structs.h" @@ -60,6 +60,9 @@ #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L +#define mmGCEA_PROBE_MAP 0x070c +#define mmGCEA_PROBE_MAP_BASE_IDX 0 + MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); MODULE_FIRMWARE("amdgpu/vega10_me.bin"); @@ -104,6 +107,397 @@ MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_mec.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/renoir_ce.bin"); +MODULE_FIRMWARE("amdgpu/renoir_pfp.bin"); +MODULE_FIRMWARE("amdgpu/renoir_me.bin"); +MODULE_FIRMWARE("amdgpu/renoir_mec.bin"); +MODULE_FIRMWARE("amdgpu/renoir_mec2.bin"); +MODULE_FIRMWARE("amdgpu/renoir_rlc.bin"); + +#define mmTCP_CHAN_STEER_0_ARCT 0x0b03 +#define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_1_ARCT 0x0b04 +#define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_2_ARCT 0x0b09 +#define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_3_ARCT 0x0b0a +#define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_4_ARCT 0x0b0b +#define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX 0 +#define mmTCP_CHAN_STEER_5_ARCT 0x0b0c +#define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 + +enum ta_ras_gfx_subblock { + /*CPC*/ + TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, + TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START, + TA_RAS_BLOCK__GFX_CPC_UCODE, + TA_RAS_BLOCK__GFX_DC_STATE_ME1, + TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1, + TA_RAS_BLOCK__GFX_DC_RESTORE_ME1, + TA_RAS_BLOCK__GFX_DC_STATE_ME2, + TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2, + TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, + TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2, + /* CPF*/ + TA_RAS_BLOCK__GFX_CPF_INDEX_START, + TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START, + TA_RAS_BLOCK__GFX_CPF_ROQ_ME1, + TA_RAS_BLOCK__GFX_CPF_TAG, + TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG, + /* CPG*/ + TA_RAS_BLOCK__GFX_CPG_INDEX_START, + TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START, + TA_RAS_BLOCK__GFX_CPG_DMA_TAG, + TA_RAS_BLOCK__GFX_CPG_TAG, + TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG, + /* GDS*/ + TA_RAS_BLOCK__GFX_GDS_INDEX_START, + TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START, + TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, + TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, + TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, + TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, + /* SPI*/ + TA_RAS_BLOCK__GFX_SPI_SR_MEM, + /* SQ*/ + TA_RAS_BLOCK__GFX_SQ_INDEX_START, + TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START, + TA_RAS_BLOCK__GFX_SQ_LDS_D, + TA_RAS_BLOCK__GFX_SQ_LDS_I, + TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/ + TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR, + /* SQC (3 ranges)*/ + TA_RAS_BLOCK__GFX_SQC_INDEX_START, + /* SQC range 0*/ + TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START, + TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = + TA_RAS_BLOCK__GFX_SQC_INDEX0_START, + TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + TA_RAS_BLOCK__GFX_SQC_INDEX0_END = + TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, + /* SQC range 1*/ + TA_RAS_BLOCK__GFX_SQC_INDEX1_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = + TA_RAS_BLOCK__GFX_SQC_INDEX1_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX1_END = + TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, + /* SQC range 2*/ + TA_RAS_BLOCK__GFX_SQC_INDEX2_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = + TA_RAS_BLOCK__GFX_SQC_INDEX2_START, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX2_END = + TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, + TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END, + /* TA*/ + TA_RAS_BLOCK__GFX_TA_INDEX_START, + TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START, + TA_RAS_BLOCK__GFX_TA_FS_AFIFO, + TA_RAS_BLOCK__GFX_TA_FL_LFIFO, + TA_RAS_BLOCK__GFX_TA_FX_LFIFO, + TA_RAS_BLOCK__GFX_TA_FS_CFIFO, + TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO, + /* TCA*/ + TA_RAS_BLOCK__GFX_TCA_INDEX_START, + TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START, + TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, + TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO, + /* TCC (5 sub-ranges)*/ + TA_RAS_BLOCK__GFX_TCC_INDEX_START, + /* TCC range 0*/ + TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, + TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, + TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, + TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, + TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, + TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, + /* TCC range 1*/ + TA_RAS_BLOCK__GFX_TCC_INDEX1_START, + TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START, + TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + TA_RAS_BLOCK__GFX_TCC_INDEX1_END = + TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, + /* TCC range 2*/ + TA_RAS_BLOCK__GFX_TCC_INDEX2_START, + TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START, + TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, + TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, + TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN, + TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, + TA_RAS_BLOCK__GFX_TCC_SRC_FIFO, + TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, + TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + TA_RAS_BLOCK__GFX_TCC_INDEX2_END = + TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, + /* TCC range 3*/ + TA_RAS_BLOCK__GFX_TCC_INDEX3_START, + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START, + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + TA_RAS_BLOCK__GFX_TCC_INDEX3_END = + TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, + /* TCC range 4*/ + TA_RAS_BLOCK__GFX_TCC_INDEX4_START, + TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = + TA_RAS_BLOCK__GFX_TCC_INDEX4_START, + TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + TA_RAS_BLOCK__GFX_TCC_INDEX4_END = + TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, + TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END, + /* TCI*/ + TA_RAS_BLOCK__GFX_TCI_WRITE_RAM, + /* TCP*/ + TA_RAS_BLOCK__GFX_TCP_INDEX_START, + TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START, + TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM, + TA_RAS_BLOCK__GFX_TCP_CMD_FIFO, + TA_RAS_BLOCK__GFX_TCP_VM_FIFO, + TA_RAS_BLOCK__GFX_TCP_DB_RAM, + TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, + TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, + /* TD*/ + TA_RAS_BLOCK__GFX_TD_INDEX_START, + TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START, + TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI, + TA_RAS_BLOCK__GFX_TD_CS_FIFO, + TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO, + /* EA (3 sub-ranges)*/ + TA_RAS_BLOCK__GFX_EA_INDEX_START, + /* EA range 0*/ + TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START, + TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START, + TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM, + TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM, + TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, + /* EA range 1*/ + TA_RAS_BLOCK__GFX_EA_INDEX1_START, + TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START, + TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM, + TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, + TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, + TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, + /* EA range 2*/ + TA_RAS_BLOCK__GFX_EA_INDEX2_START, + TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START, + TA_RAS_BLOCK__GFX_EA_MAM_D1MEM, + TA_RAS_BLOCK__GFX_EA_MAM_D2MEM, + TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, + TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM, + TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END, + /* UTC VM L2 bank*/ + TA_RAS_BLOCK__UTC_VML2_BANK_CACHE, + /* UTC VM walker*/ + TA_RAS_BLOCK__UTC_VML2_WALKER, + /* UTC ATC L2 2MB cache*/ + TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, + /* UTC ATC L2 4KB cache*/ + TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, + TA_RAS_BLOCK__GFX_MAX +}; + +struct ras_gfx_subblock { + unsigned char *name; + int ta_subblock; + int hw_supported_error_type; + int sw_supported_error_type; +}; + +#define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h) \ + [AMDGPU_RAS_BLOCK__##subblock] = { \ + #subblock, \ + TA_RAS_BLOCK__##subblock, \ + ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)), \ + (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)), \ + } + +static const struct ras_gfx_subblock ras_gfx_subblocks[] = { + AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, + 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0, + 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0, + 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0, + 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0, + 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0), + AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0), +}; + static const struct soc15_reg_golden golden_settings_gc_9_0[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), @@ -227,6 +621,22 @@ SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), }; +static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc), +}; + static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff), @@ -271,6 +681,18 @@ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; +static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135), +}; + static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = { mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0, @@ -310,19 +732,21 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); +static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status); +static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, + void *inject_if); static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - if (!amdgpu_virt_support_skip_setting(adev)) { - soc15_program_register_sequence(adev, - golden_settings_gc_9_0, - ARRAY_SIZE(golden_settings_gc_9_0)); - soc15_program_register_sequence(adev, - golden_settings_gc_9_0_vg10, - ARRAY_SIZE(golden_settings_gc_9_0_vg10)); - } + soc15_program_register_sequence(adev, + golden_settings_gc_9_0, + ARRAY_SIZE(golden_settings_gc_9_0)); + soc15_program_register_sequence(adev, + golden_settings_gc_9_0_vg10, + ARRAY_SIZE(golden_settings_gc_9_0_vg10)); break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -340,6 +764,11 @@ golden_settings_gc_9_0_vg20, ARRAY_SIZE(golden_settings_gc_9_0_vg20)); break; + case CHIP_ARCTURUS: + soc15_program_register_sequence(adev, + golden_settings_gc_9_4_1_arct, + ARRAY_SIZE(golden_settings_gc_9_4_1_arct)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_gc_9_1, ARRAY_SIZE(golden_settings_gc_9_1)); @@ -352,12 +781,18 @@ golden_settings_gc_9_1_rv1, ARRAY_SIZE(golden_settings_gc_9_1_rv1)); break; + case CHIP_RENOIR: + soc15_program_register_sequence(adev, + golden_settings_gc_9_1_rn, + ARRAY_SIZE(golden_settings_gc_9_1_rn)); + return; /* for renoir, don't need common goldensetting */ default: break; } - soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, - (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); + if (adev->asic_type != CHIP_ARCTURUS) + soc15_program_register_sequence(adev, golden_settings_gc_9_x_common, + (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common)); } static void gfx_v9_0_scratch_init(struct amdgpu_device *adev) @@ -596,14 +1031,14 @@ case CHIP_VEGA20: break; case CHIP_RAVEN: - if (adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) - break; - if ((adev->gfx.rlc_fw_version != 106 && - adev->gfx.rlc_fw_version < 531) || - (adev->gfx.rlc_fw_version == 53815) || - (adev->gfx.rlc_feature_version < 1) || - !adev->gfx.rlc.is_rlc_v2_1) + if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8) + &&((adev->gfx.rlc_fw_version != 106 && + adev->gfx.rlc_fw_version < 531) || + (adev->gfx.rlc_fw_version == 53815) || + (adev->gfx.rlc_feature_version < 1) || + !adev->gfx.rlc.is_rlc_v2_1)) adev->pm.pp_feature &= ~PP_GFXOFF_MASK; + if (adev->pm.pp_feature & PP_GFXOFF_MASK) adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_CP | @@ -614,44 +1049,14 @@ } } -static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, + const char *chip_name) { - const char *chip_name; char fw_name[30]; int err; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr; - const struct rlc_firmware_header_v2_0 *rlc_hdr; - unsigned int *tmp = NULL; - unsigned int i = 0; - uint16_t version_major; - uint16_t version_minor; - uint32_t smu_version; - - DRM_DEBUG("\n"); - - switch (adev->asic_type) { - case CHIP_VEGA10: - chip_name = "vega10"; - break; - case CHIP_VEGA12: - chip_name = "vega12"; - break; - case CHIP_VEGA20: - chip_name = "vega20"; - break; - case CHIP_RAVEN: - if (adev->rev_id >= 8) - chip_name = "raven2"; - else if (adev->pdev->device == 0x15d8) - chip_name = "picasso"; - else - chip_name = "raven"; - break; - default: - BUG(); - } snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name); err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev); @@ -686,6 +1091,58 @@ adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; + info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; + info->fw = adev->gfx.pfp_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; + info->ucode_id = AMDGPU_UCODE_ID_CP_ME; + info->fw = adev->gfx.me_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; + info->ucode_id = AMDGPU_UCODE_ID_CP_CE; + info->fw = adev->gfx.ce_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } + +out: + if (err) { + dev_err(adev->dev, + "gfx9: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.pfp_fw); + adev->gfx.pfp_fw = NULL; + release_firmware(adev->gfx.me_fw); + adev->gfx.me_fw = NULL; + release_firmware(adev->gfx.ce_fw); + adev->gfx.ce_fw = NULL; + } + return err; +} + +static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + unsigned int *tmp = NULL; + unsigned int i = 0; + uint16_t version_major; + uint16_t version_minor; + uint32_t smu_version; + /* * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin * instead of picasso_rlc.bin. @@ -760,57 +1217,7 @@ if (adev->gfx.rlc.is_rlc_v2_1) gfx_v9_0_init_rlc_ext_microcode(adev); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); - err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->gfx.mec_fw); - if (err) - goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - - - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); - err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); - if (!err) { - err = amdgpu_ucode_validate(adev->gfx.mec2_fw); - if (err) - goto out; - cp_hdr = (const struct gfx_firmware_header_v1_0 *) - adev->gfx.mec2_fw->data; - adev->gfx.mec2_fw_version = - le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.mec2_feature_version = - le32_to_cpu(cp_hdr->ucode_feature_version); - } else { - err = 0; - adev->gfx.mec2_fw = NULL; - } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; - info->ucode_id = AMDGPU_UCODE_ID_CP_CE; - info->fw = adev->gfx.ce_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; info->ucode_id = AMDGPU_UCODE_ID_RLC_G; info->fw = adev->gfx.rlc_fw; @@ -840,7 +1247,58 @@ adev->firmware.fw_size += ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE); } + } +out: + if (err) { + dev_err(adev->dev, + "gfx9: Failed to load firmware \"%s\"\n", + fw_name); + release_firmware(adev->gfx.rlc_fw); + adev->gfx.rlc_fw = NULL; + } + return err; +} + +static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, + const char *chip_name) +{ + char fw_name[30]; + int err; + struct amdgpu_firmware_info *info = NULL; + const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr; + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); + err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); + if (err) + goto out; + err = amdgpu_ucode_validate(adev->gfx.mec_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); + err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); + if (!err) { + err = amdgpu_ucode_validate(adev->gfx.mec2_fw); + if (err) + goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw_version = + le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec2_feature_version = + le32_to_cpu(cp_hdr->ucode_feature_version); + } else { + err = 0; + adev->gfx.mec2_fw = NULL; + } + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; info->fw = adev->gfx.mec_fw; @@ -863,13 +1321,18 @@ cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data; adev->firmware.fw_size += ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; - info->fw = adev->gfx.mec2_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE); - } + /* TODO: Determine if MEC2 JT FW loading can be removed + for all GFX V9 asic and above */ + if (adev->asic_type != CHIP_ARCTURUS) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT; + info->fw = adev->gfx.mec2_fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, + PAGE_SIZE); + } + } } out: @@ -879,14 +1342,6 @@ dev_err(adev->dev, "gfx9: Failed to load firmware \"%s\"\n", fw_name); - release_firmware(adev->gfx.pfp_fw); - adev->gfx.pfp_fw = NULL; - release_firmware(adev->gfx.me_fw); - adev->gfx.me_fw = NULL; - release_firmware(adev->gfx.ce_fw); - adev->gfx.ce_fw = NULL; - release_firmware(adev->gfx.rlc_fw); - adev->gfx.rlc_fw = NULL; release_firmware(adev->gfx.mec_fw); adev->gfx.mec_fw = NULL; release_firmware(adev->gfx.mec2_fw); @@ -895,6 +1350,59 @@ return err; } +static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) +{ + const char *chip_name; + int r; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_VEGA10: + chip_name = "vega10"; + break; + case CHIP_VEGA12: + chip_name = "vega12"; + break; + case CHIP_VEGA20: + chip_name = "vega20"; + break; + case CHIP_RAVEN: + if (adev->rev_id >= 8) + chip_name = "raven2"; + else if (adev->pdev->device == 0x15d8) + chip_name = "picasso"; + else + chip_name = "raven"; + break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; + case CHIP_RENOIR: + chip_name = "renoir"; + break; + default: + BUG(); + } + + /* No CPG in Arcturus */ + if (adev->asic_type != CHIP_ARCTURUS) { + r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name); + if (r) + return r; + } + + r = gfx_v9_0_init_rlc_microcode(adev, chip_name); + if (r) + return r; + + r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name); + if (r) + return r; + + return r; +} + static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) { u32 count = 0; @@ -1132,7 +1640,7 @@ return r; } - if (adev->asic_type == CHIP_RAVEN) { + if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { /* TODO: double check the cp_table_size for RV */ adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ r = amdgpu_gfx_rlc_init_cpt(adev); @@ -1142,6 +1650,7 @@ switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: gfx_v9_0_init_lbpw(adev); break; case CHIP_VEGA20: @@ -1328,7 +1837,9 @@ .read_wave_data = &gfx_v9_0_read_wave_data, .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs, .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs, - .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q + .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q, + .ras_error_inject = &gfx_v9_0_ras_error_inject, + .query_ras_error_count = &gfx_v9_0_query_ras_error_count }; static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) @@ -1381,6 +1892,26 @@ else gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; break; + case CHIP_ARCTURUS: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + gb_addr_config &= ~0xf3e777ff; + gb_addr_config |= 0x22014042; + break; + case CHIP_RENOIR: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x80; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG); + gb_addr_config &= ~0xf3e777ff; + gb_addr_config |= 0x22010042; + break; default: BUG(); break; @@ -1657,6 +2188,8 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: adev->gfx.mec.num_mec = 2; break; default: @@ -1814,7 +2347,7 @@ gfx_v9_0_mec_fini(adev); gfx_v9_0_ngg_fini(adev); amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj); - if (adev->asic_type == CHIP_RAVEN) { + if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) { amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, &adev->gfx.rlc.cp_table_gpu_addr, (void **)&adev->gfx.rlc.cp_table_ptr); @@ -1933,6 +2466,24 @@ } } +static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) +{ + int vmid; + + /* + * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA + * access. Compute VMIDs should be enabled by FW for target VMIDs, + * the driver can enable them for graphics. VMID0 should maintain + * access so that HWS firmware can save/restore entries. + */ + for (vmid = 1; vmid < 16; vmid++) { + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); + WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0); + } +} + static void gfx_v9_0_constants_init(struct amdgpu_device *adev) { u32 tmp; @@ -1949,7 +2500,7 @@ /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { soc15_grbm_select(adev, 0, 0, 0, i); /* CP and shaders */ if (i == 0) { @@ -1977,6 +2528,7 @@ mutex_unlock(&adev->srbm_mutex); gfx_v9_0_init_compute_vmid(adev); + gfx_v9_0_init_gds_vmid(adev); } static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) @@ -2474,6 +3026,7 @@ switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: if (amdgpu_lbpw == 0) gfx_v9_0_enable_lbpw(adev, false); else @@ -2853,6 +3406,10 @@ mqd->compute_static_thread_mgmt_se1 = 0xffffffff; mqd->compute_static_thread_mgmt_se2 = 0xffffffff; mqd->compute_static_thread_mgmt_se3 = 0xffffffff; + mqd->compute_static_thread_mgmt_se4 = 0xffffffff; + mqd->compute_static_thread_mgmt_se5 = 0xffffffff; + mqd->compute_static_thread_mgmt_se6 = 0xffffffff; + mqd->compute_static_thread_mgmt_se7 = 0xffffffff; mqd->compute_misc_reserved = 0x00000003; mqd->dynamic_cu_mask_addr_lo = @@ -3256,10 +3813,12 @@ gfx_v9_0_enable_gui_idle_interrupt(adev, false); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - /* legacy firmware loading */ - r = gfx_v9_0_cp_gfx_load_microcode(adev); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + /* legacy firmware loading */ + r = gfx_v9_0_cp_gfx_load_microcode(adev); + if (r) + return r; + } r = gfx_v9_0_cp_compute_load_microcode(adev); if (r) @@ -3270,18 +3829,22 @@ if (r) return r; - r = gfx_v9_0_cp_gfx_resume(adev); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + r = gfx_v9_0_cp_gfx_resume(adev); + if (r) + return r; + } r = gfx_v9_0_kcq_resume(adev); if (r) return r; - ring = &adev->gfx.gfx_ring[0]; - r = amdgpu_ring_test_helper(ring); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + ring = &adev->gfx.gfx_ring[0]; + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -3295,7 +3858,8 @@ static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable) { - gfx_v9_0_cp_gfx_enable(adev, enable); + if (adev->asic_type != CHIP_ARCTURUS) + gfx_v9_0_cp_gfx_enable(adev, enable); gfx_v9_0_cp_compute_enable(adev, enable); } @@ -3304,7 +3868,8 @@ int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfx_v9_0_init_golden_registers(adev); + if (!amdgpu_sriov_vf(adev)) + gfx_v9_0_init_golden_registers(adev); gfx_v9_0_constants_init(adev); @@ -3320,9 +3885,11 @@ if (r) return r; - r = gfx_v9_0_ngg_en(adev); - if (r) - return r; + if (adev->asic_type != CHIP_ARCTURUS) { + r = gfx_v9_0_ngg_en(adev); + if (r) + return r; + } return r; } @@ -3470,8 +4037,9 @@ /* stop the rlc */ adev->gfx.rlc.funcs->stop(adev); - /* Disable GFX parsing/prefetching */ - gfx_v9_0_cp_gfx_enable(adev, false); + if (adev->asic_type != CHIP_ARCTURUS) + /* Disable GFX parsing/prefetching */ + gfx_v9_0_cp_gfx_enable(adev, false); /* Disable MEC parsing/prefetching */ gfx_v9_0_cp_compute_enable(adev, false); @@ -3814,7 +4382,10 @@ { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; + if (adev->asic_type == CHIP_ARCTURUS) + adev->gfx.num_gfx_rings = 0; + else + adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); @@ -3825,6 +4396,7 @@ } static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry); static int gfx_v9_0_ecc_late_init(void *handle) @@ -3990,6 +4562,9 @@ { amdgpu_gfx_rlc_enter_safe_mode(adev); + if (is_support_sw_smu(adev) && !enable) + smu_set_gfx_cgpg(&adev->smu, enable); + if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { gfx_v9_0_enable_gfx_cg_power_gating(adev, true); if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) @@ -4101,6 +4676,9 @@ { uint32_t data, def; + if (adev->asic_type == CHIP_ARCTURUS) + return; + amdgpu_gfx_rlc_enter_safe_mode(adev); /* Enable 3D CGCG/CGLS */ @@ -4166,8 +4744,12 @@ /* enable cgcg FSM(0x0000363F) */ def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL); - data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | - RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + if (adev->asic_type == CHIP_ARCTURUS) + data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; + else + data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) | + RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK; if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; @@ -4239,6 +4821,7 @@ switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: if (!enable) { amdgpu_gfx_off_ctrl(adev, false); cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); @@ -4257,6 +4840,8 @@ gfx_v9_0_enable_cp_power_gating(adev, false); /* update gfx cgpg state */ + if (is_support_sw_smu(adev) && enable) + smu_set_gfx_cgpg(&adev->smu, enable); gfx_v9_0_update_gfx_cg_power_gating(adev, enable); /* update mgcg state */ @@ -4293,6 +4878,8 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: gfx_v9_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; @@ -4334,14 +4921,16 @@ if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS; - /* AMD_CG_SUPPORT_GFX_3D_CGCG */ - data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); - if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) - *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; - - /* AMD_CG_SUPPORT_GFX_3D_CGLS */ - if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) - *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; + if (adev->asic_type != CHIP_ARCTURUS) { + /* AMD_CG_SUPPORT_GFX_3D_CGCG */ + data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D); + if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG; + + /* AMD_CG_SUPPORT_GFX_3D_CGLS */ + if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK) + *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS; + } } static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) @@ -5137,12 +5726,423 @@ } static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry) { /* TODO ue will trigger an interrupt. */ kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); + if (adev->gfx.funcs->query_ras_error_count) + adev->gfx.funcs->query_ras_error_count(adev, err_data); amdgpu_ras_reset_gpu(adev, 0); - return AMDGPU_RAS_UE; + return AMDGPU_RAS_SUCCESS; +} + +static const struct { + const char *name; + uint32_t ip; + uint32_t inst; + uint32_t seg; + uint32_t reg_offset; + uint32_t per_se_instance; + int32_t num_instance; + uint32_t sec_count_mask; + uint32_t ded_count_mask; +} gfx_ras_edc_regs[] = { + { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, + REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT), + REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) }, + { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, + REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT), + REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) }, + { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, + REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 }, + { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, + REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 }, + { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, + REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT), + REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) }, + { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, + REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 }, + { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, + REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT), + REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) }, + { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, + REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT), + REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) }, + { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, + REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 }, + { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, + REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 }, + { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, + REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 }, + { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) }, + { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 }, + { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), + 0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) }, + { "GDS_OA_PHY_PHY_CMD_RAM_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) }, + { "GDS_OA_PHY_PHY_DATA_RAM_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 }, + { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) }, + { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) }, + { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) }, + { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM", + SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC), + REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) }, + { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1, + REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 }, + { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT), + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) }, + { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 }, + { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 }, + { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 }, + { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16, + REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 }, + { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, + REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 }, + { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2, + REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 }, + { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) }, + { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) }, + { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) }, + { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) }, + { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT), + REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) }, + { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 }, + { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 }, + { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 }, + { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 }, + { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 }, + { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 }, + { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 }, + { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16, + REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 }, + { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, + 16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 }, + { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT), + 0 }, + { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, + 16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 }, + { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), + 0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT), + 0 }, + { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, + 16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 }, + { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72, + REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 }, + { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) }, + { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) }, + { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 }, + { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 }, + { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 }, + { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) }, + { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16, + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT), + REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) }, + { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT), + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) }, + { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT), + REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) }, + { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16, + REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 }, + { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) }, + { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) }, + { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) }, + { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) }, + { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) }, + { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) }, + { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16, + REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT), + REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) }, + { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), + 1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) }, + { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) }, + { "SQC_INST_BANKA_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT), + 0 }, + { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKA_DIRTY_BIT_RAM", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 }, + { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) }, + { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) }, + { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT), + REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) }, + { "SQC_INST_BANKB_UTCL1_MISS_FIFO", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT), + 0 }, + { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, + 6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 }, + { "SQC_DATA_BANKB_DIRTY_BIT_RAM", + SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6, + REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 }, + { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) }, + { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) }, + { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) }, + { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) }, + { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 }, + { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 }, + { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 }, + { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 }, + { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 }, + { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) }, + { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) }, + { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT), + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) }, + { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 }, + { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 }, + { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 }, + { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 }, + { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 }, + { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32, + REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 }, +}; + +static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, + void *inject_if) +{ + struct ras_inject_if *info = (struct ras_inject_if *)inject_if; + int ret; + struct ta_ras_trigger_error_input block_info = { 0 }; + + if (adev->asic_type != CHIP_VEGA20) + return -EINVAL; + + if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks)) + return -EINVAL; + + if (!ras_gfx_subblocks[info->head.sub_block_index].name) + return -EPERM; + + if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type & + info->head.type)) { + DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n", + ras_gfx_subblocks[info->head.sub_block_index].name, + info->head.type); + return -EPERM; + } + + if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type & + info->head.type)) { + DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n", + ras_gfx_subblocks[info->head.sub_block_index].name, + info->head.type); + return -EPERM; + } + + block_info.block_id = amdgpu_ras_block_to_ta(info->head.block); + block_info.sub_block_index = + ras_gfx_subblocks[info->head.sub_block_index].ta_subblock; + block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type); + block_info.address = info->address; + block_info.value = info->value; + + mutex_lock(&adev->grbm_idx_mutex); + ret = psp_ras_trigger_error(&adev->psp, &block_info); + mutex_unlock(&adev->grbm_idx_mutex); + + return ret; +} + +static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + uint32_t sec_count, ded_count; + uint32_t i; + uint32_t reg_value; + uint32_t se_id, instance_id; + + if (adev->asic_type != CHIP_VEGA20) + return -EINVAL; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + mutex_lock(&adev->grbm_idx_mutex); + for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) { + for (instance_id = 0; instance_id < 256; instance_id++) { + for (i = 0; + i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]); + i++) { + if (se_id != 0 && + !gfx_ras_edc_regs[i].per_se_instance) + continue; + if (instance_id >= gfx_ras_edc_regs[i].num_instance) + continue; + + gfx_v9_0_select_se_sh(adev, se_id, 0, + instance_id); + + reg_value = RREG32( + adev->reg_offset[gfx_ras_edc_regs[i].ip] + [gfx_ras_edc_regs[i].inst] + [gfx_ras_edc_regs[i].seg] + + gfx_ras_edc_regs[i].reg_offset); + sec_count = reg_value & + gfx_ras_edc_regs[i].sec_count_mask; + ded_count = reg_value & + gfx_ras_edc_regs[i].ded_count_mask; + if (sec_count) { + DRM_INFO( + "Instance[%d][%d]: SubBlock %s, SEC %d\n", + se_id, instance_id, + gfx_ras_edc_regs[i].name, + sec_count); + err_data->ce_count++; + } + + if (ded_count) { + DRM_INFO( + "Instance[%d][%d]: SubBlock %s, DED %d\n", + se_id, instance_id, + gfx_ras_edc_regs[i].name, + ded_count); + err_data->ue_count++; + } + } + } + } + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; } static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev, @@ -5187,7 +6187,7 @@ .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_gfx, .get_wptr = gfx_v9_0_ring_get_wptr_gfx, .set_wptr = gfx_v9_0_ring_set_wptr_gfx, @@ -5238,7 +6238,7 @@ .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -5273,7 +6273,7 @@ .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -5353,6 +6353,8 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; break; default: @@ -5370,6 +6372,7 @@ adev->gds.gds_size = 0x10000; break; case CHIP_RAVEN: + case CHIP_ARCTURUS: adev->gds.gds_size = 0x1000; break; default: @@ -5391,6 +6394,9 @@ else adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */ break; + case CHIP_ARCTURUS: + adev->gds.gds_compute_max_wave_id = 0xfff; + break; default: /* this really depends on the chip */ adev->gds.gds_compute_max_wave_id = 0x7ff; @@ -5435,12 +6441,21 @@ { int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; - unsigned disable_masks[4 * 2]; + unsigned disable_masks[4 * 4]; if (!adev || !cu_info) return -EINVAL; - amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); + /* + * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs + */ + if (adev->gfx.config.max_shader_engines * + adev->gfx.config.max_sh_per_se > 16) + return -EINVAL; + + amdgpu_gfx_parse_disable_cu(disable_masks, + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { @@ -5449,11 +6464,23 @@ ao_bitmap = 0; counter = 0; gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); - if (i < 4 && j < 2) - gfx_v9_0_set_user_cu_inactive_bitmap( - adev, disable_masks[i * 2 + j]); + gfx_v9_0_set_user_cu_inactive_bitmap( + adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_0_get_cu_active_bitmap(adev); - cu_info->bitmap[i][j] = bitmap; + + /* + * The bitmap(and ao_cu_bitmap) in cu_info structure is + * 4x4 size array, and it's usually suitable for Vega + * ASICs which has 4*2 SE/SH layout. + * But for Arcturus, SE/SH layout is changed to 8*1. + * To mostly reduce the impact, we make it compatible + * with current bitmap array as below: + * SE4,SH0 --> bitmap[0][1] + * SE5,SH0 --> bitmap[1][1] + * SE6,SH0 --> bitmap[2][1] + * SE7,SH0 --> bitmap[3][1] + */ + cu_info->bitmap[i % 4][j + i / 4] = bitmap; for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { @@ -5466,7 +6493,7 @@ active_cu_number += counter; if (i < 2 && j < 2) ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); - cu_info->ao_cu_bitmap[i][j] = ao_bitmap; + cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 2019-08-31 15:01:11.848736167 -0500 @@ -62,7 +62,7 @@ struct amdgpu_vmhub *hub; u32 tmp, reg, bits[AMDGPU_MAX_VMHUBS], i; - bits[AMDGPU_GFXHUB] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + bits[AMDGPU_GFXHUB_0] = GCVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | @@ -70,7 +70,7 @@ GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK; - bits[AMDGPU_MMHUB] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | + bits[AMDGPU_MMHUB_0] = MMVM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | MMVM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | MMVM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | MMVM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK | @@ -81,39 +81,39 @@ switch (state) { case AMDGPU_IRQ_STATE_DISABLE: /* MM HUB */ - hub = &adev->vmhub[AMDGPU_MMHUB]; + hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp &= ~bits[AMDGPU_MMHUB]; + tmp &= ~bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); } /* GFX HUB */ - hub = &adev->vmhub[AMDGPU_GFXHUB]; + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp &= ~bits[AMDGPU_GFXHUB]; + tmp &= ~bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); } break; case AMDGPU_IRQ_STATE_ENABLE: /* MM HUB */ - hub = &adev->vmhub[AMDGPU_MMHUB]; + hub = &adev->vmhub[AMDGPU_MMHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp |= bits[AMDGPU_MMHUB]; + tmp |= bits[AMDGPU_MMHUB_0]; WREG32(reg, tmp); } /* GFX HUB */ - hub = &adev->vmhub[AMDGPU_GFXHUB]; + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; tmp = RREG32(reg); - tmp |= bits[AMDGPU_GFXHUB]; + tmp |= bits[AMDGPU_GFXHUB_0]; WREG32(reg, tmp); } break; @@ -136,22 +136,53 @@ addr |= ((u64)entry->src_data[1] & 0xf) << 44; if (!amdgpu_sriov_vf(adev)) { + /* + * Issue a dummy read to wait for the status register to + * be updated to avoid reading an incorrect value due to + * the new fast GRBM interface. + */ + if (entry->vmid_src == AMDGPU_GFXHUB_0) + RREG32(hub->vm_l2_pro_fault_status); + status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); } if (printk_ratelimit()) { + struct amdgpu_task_info task_info; + + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); + amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + dev_err(adev->dev, - "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n", + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " + "for process %s pid %d thread %s pid %d)\n", entry->vmid_src ? "mmhub" : "gfxhub", entry->src_id, entry->ring_id, entry->vmid, - entry->pasid); - dev_err(adev->dev, " at page 0x%016llx from %d\n", + entry->pasid, task_info.process_name, task_info.tgid, + task_info.task_name, task_info.pid); + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", addr, entry->client_id); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { dev_err(adev->dev, - "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", + "GCVM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%lx\n", + REG_GET_FIELD(status, + GCVM_L2_PROTECTION_FAULT_STATUS, RW)); + } } return 0; @@ -206,6 +237,13 @@ WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + /* + * Issue a dummy read to wait for the ACK register to be cleared + * to avoid a false ACK due to the new fast GRBM interface. + */ + if (vmhub == AMDGPU_GFXHUB_0) + RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + /* Wait for ACK with a delay.*/ for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); @@ -230,8 +268,8 @@ * * Flush the TLB for the requested page table. */ -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct dma_fence *fence; @@ -244,11 +282,18 @@ mutex_lock(&adev->mman.gtt_window_lock); - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB, 0); + if (vmhub == AMDGPU_MMHUB_0) { + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); + mutex_unlock(&adev->mman.gtt_window_lock); + return; + } + + BUG_ON(vmhub != AMDGPU_GFXHUB_0); + if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || adev->in_gpu_reset) { - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB, 0); + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB_0, 0); mutex_unlock(&adev->mman.gtt_window_lock); return; } @@ -313,7 +358,7 @@ struct amdgpu_device *adev = ring->adev; uint32_t reg; - if (ring->funcs->vmhub == AMDGPU_GFXHUB) + if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -524,6 +569,8 @@ if (amdgpu_gart_size == -1) { switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: default: adev->gmc.gart_size = 512ULL << 20; break; @@ -590,7 +637,6 @@ static int gmc_v10_0_sw_init(void *handle) { int r; - int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfxhub_v2_0_init(adev); @@ -601,9 +647,12 @@ adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + adev->num_vmhubs = 2; /* * To fulfill 4-level page support, - * vm size is 256TB (48bit), maximum size of Navi10, + * vm size is 256TB (48bit), maximum size of Navi10/Navi14/Navi12, * block size 512 (9bit) */ amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); @@ -637,26 +686,10 @@ else adev->gmc.stolen_size = 9 * 1024 *1024; - /* - * Set DMA mask + need_dma32 flags. - * PCIE - can handle 44-bits. - * IGP - can handle 44-bits - * PCI - dma32 for legacy pci gart, 44 bits on navi10 - */ - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 44; - - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); - } - - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); + return r; } r = gmc_v10_0_mc_init(adev); @@ -680,8 +713,8 @@ * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; amdgpu_vm_manager_init(adev); @@ -717,6 +750,8 @@ { switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: break; default: break; @@ -766,7 +801,8 @@ gfxhub_v2_0_set_fault_enable_default(adev, value); mmhub_v2_0_set_fault_enable_default(adev, value); - gmc_v10_0_flush_gpu_tlb(adev, 0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c 2019-08-31 15:01:11.848736167 -0500 @@ -362,8 +362,8 @@ return 0; } -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } @@ -571,7 +571,7 @@ else gmc_v6_0_set_fault_enable_default(adev, true); - gmc_v6_0_flush_gpu_tlb(adev, 0, 0); + gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0); dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); @@ -839,9 +839,10 @@ static int gmc_v6_0_sw_init(void *handle) { int r; - int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->num_vmhubs = 1; + if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { @@ -862,20 +863,12 @@ adev->gmc.mc_mask = 0xffffffffffULL; - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 40; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; dev_warn(adev->dev, "amdgpu: No suitable DMA available.\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - dev_warn(adev->dev, "amdgpu: No coherent DMA available.\n"); - } - adev->need_swiotlb = drm_need_swiotlb(dma_bits); + adev->need_swiotlb = drm_need_swiotlb(44); r = gmc_v6_0_init_microcode(adev); if (r) { diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c 2019-08-31 15:01:11.848736167 -0500 @@ -433,8 +433,8 @@ * * Flush the TLB for the requested page table (CIK). */ -static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -677,7 +677,7 @@ WREG32(mmCHUB_CONTROL, tmp); } - gmc_v7_0_flush_gpu_tlb(adev, 0, 0); + gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); @@ -959,9 +959,10 @@ static int gmc_v7_0_sw_init(void *handle) { int r; - int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->num_vmhubs = 1; + if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { @@ -990,25 +991,12 @@ */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - /* set DMA mask + need_dma32 flags. - * PCIE - can handle 40-bits. - * IGP - can handle 40-bits - * PCI - dma32 for legacy pci gart, 40 bits on newer asics - */ - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 40; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; pr_warn("amdgpu: No suitable DMA available\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - pr_warn("amdgpu: No coherent DMA available\n"); - } - adev->need_swiotlb = drm_need_swiotlb(dma_bits); + adev->need_swiotlb = drm_need_swiotlb(40); r = gmc_v7_0_init_microcode(adev); if (r) { diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c 2019-08-31 15:01:11.848736167 -0500 @@ -635,8 +635,8 @@ * * Flush the TLB for the requested page table (VI). */ -static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -921,7 +921,7 @@ else gmc_v8_0_set_fault_enable_default(adev, true); - gmc_v8_0_flush_gpu_tlb(adev, 0, 0); + gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); @@ -1079,9 +1079,10 @@ static int gmc_v8_0_sw_init(void *handle) { int r; - int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->num_vmhubs = 1; + if (adev->flags & AMD_IS_APU) { adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN; } else { @@ -1116,25 +1117,12 @@ */ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ - /* set DMA mask + need_dma32 flags. - * PCIE - can handle 40-bits. - * IGP - can handle 40-bits - * PCI - dma32 for legacy pci gart, 40 bits on newer asics - */ - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 40; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; pr_warn("amdgpu: No suitable DMA available\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - pr_warn("amdgpu: No coherent DMA available\n"); - } - adev->need_swiotlb = drm_need_swiotlb(dma_bits); + adev->need_swiotlb = drm_need_swiotlb(40); r = gmc_v8_0_init_microcode(adev); if (r) { diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 2019-08-31 15:01:11.848736167 -0500 @@ -47,7 +47,10 @@ #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" +#include "athub_v1_0.h" #include "gfxhub_v1_1.h" +#include "mmhub_v9_4.h" +#include "umc_v6_1.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" @@ -241,18 +244,30 @@ } static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry) { kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - amdgpu_ras_reset_gpu(adev, 0); - return AMDGPU_RAS_UE; + if (adev->umc.funcs->query_ras_error_count) + adev->umc.funcs->query_ras_error_count(adev, err_data); + /* umc query_ras_error_address is also responsible for clearing + * error status + */ + if (adev->umc.funcs->query_ras_error_address) + adev->umc.funcs->query_ras_error_address(adev, err_data); + + /* only uncorrectable error needs gpu reset */ + if (err_data->ue_count) + amdgpu_ras_reset_gpu(adev, 0); + + return AMDGPU_RAS_SUCCESS; } static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - struct ras_common_if *ras_if = adev->gmc.ras_if; + struct ras_common_if *ras_if = adev->gmc.umc_ras_if; struct ras_dispatch_if ih_data = { .entry = entry, }; @@ -284,7 +299,7 @@ switch (state) { case AMDGPU_IRQ_STATE_DISABLE: - for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { + for (j = 0; j < adev->num_vmhubs; j++) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -295,7 +310,7 @@ } break; case AMDGPU_IRQ_STATE_ENABLE: - for (j = 0; j < AMDGPU_MAX_VMHUBS; j++) { + for (j = 0; j < adev->num_vmhubs; j++) { hub = &adev->vmhub[j]; for (i = 0; i < 16; i++) { reg = hub->vm_context0_cntl + i; @@ -315,10 +330,11 @@ struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; + struct amdgpu_vmhub *hub; bool retry_fault = !!(entry->src_data[1] & 0x80); uint32_t status = 0; u64 addr; + char hub_name[10]; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; @@ -327,8 +343,27 @@ entry->timestamp)) return 1; /* This also prevents sending it to KFD */ + if (entry->client_id == SOC15_IH_CLIENTID_VMC) { + snprintf(hub_name, sizeof(hub_name), "mmhub0"); + hub = &adev->vmhub[AMDGPU_MMHUB_0]; + } else if (entry->client_id == SOC15_IH_CLIENTID_VMC1) { + snprintf(hub_name, sizeof(hub_name), "mmhub1"); + hub = &adev->vmhub[AMDGPU_MMHUB_1]; + } else { + snprintf(hub_name, sizeof(hub_name), "gfxhub0"); + hub = &adev->vmhub[AMDGPU_GFXHUB_0]; + } + /* If it's the first fault for this address, process it normally */ if (!amdgpu_sriov_vf(adev)) { + /* + * Issue a dummy read to wait for the status register to + * be updated to avoid reading an incorrect value due to + * the new fast GRBM interface. + */ + if (entry->vmid_src == AMDGPU_GFXHUB_0) + RREG32(hub->vm_l2_pro_fault_status); + status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); } @@ -342,17 +377,33 @@ dev_err(adev->dev, "[%s] %s page fault (src_id:%u ring:%u vmid:%u " "pasid:%u, for process %s pid %d thread %s pid %d)\n", - entry->vmid_src ? "mmhub" : "gfxhub", - retry_fault ? "retry" : "no-retry", + hub_name, retry_fault ? "retry" : "no-retry", entry->src_id, entry->ring_id, entry->vmid, entry->pasid, task_info.process_name, task_info.tgid, task_info.task_name, task_info.pid); - dev_err(adev->dev, " in page starting at address 0x%016llx from %d\n", + dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", addr, entry->client_id); - if (!amdgpu_sriov_vf(adev)) + if (!amdgpu_sriov_vf(adev)) { dev_err(adev->dev, "VM_L2_PROTECTION_FAULT_STATUS:0x%08X\n", status); + dev_err(adev->dev, "\t MORE_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, MORE_FAULTS)); + dev_err(adev->dev, "\t WALKER_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, WALKER_ERROR)); + dev_err(adev->dev, "\t PERMISSION_FAULTS: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, PERMISSION_FAULTS)); + dev_err(adev->dev, "\t MAPPING_ERROR: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, MAPPING_ERROR)); + dev_err(adev->dev, "\t RW: 0x%lx\n", + REG_GET_FIELD(status, + VM_L2_PROTECTION_FAULT_STATUS, RW)); + + } } return 0; @@ -413,44 +464,53 @@ * * Flush the TLB for the requested page table using certain type. */ -static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { const unsigned eng = 17; - unsigned i, j; + u32 j, tmp; + struct amdgpu_vmhub *hub; - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { - struct amdgpu_vmhub *hub = &adev->vmhub[i]; - u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); + BUG_ON(vmhub >= adev->num_vmhubs); - /* This is necessary for a HW workaround under SRIOV as well - * as GFXOFF under bare metal - */ - if (adev->gfx.kiq.ring.sched.ready && - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - !adev->in_gpu_reset) { - uint32_t req = hub->vm_inv_eng0_req + eng; - uint32_t ack = hub->vm_inv_eng0_ack + eng; - - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, - 1 << vmid); - continue; - } + hub = &adev->vmhub[vmhub]; + tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); - spin_lock(&adev->gmc.invalidate_lock); - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); - for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); - if (tmp & (1 << vmid)) - break; - udelay(1); - } - spin_unlock(&adev->gmc.invalidate_lock); - if (j < adev->usec_timeout) - continue; + /* This is necessary for a HW workaround under SRIOV as well + * as GFXOFF under bare metal + */ + if (adev->gfx.kiq.ring.sched.ready && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && + !adev->in_gpu_reset) { + uint32_t req = hub->vm_inv_eng0_req + eng; + uint32_t ack = hub->vm_inv_eng0_ack + eng; - DRM_ERROR("Timeout waiting for VM flush ACK!\n"); + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, + 1 << vmid); + return; } + + spin_lock(&adev->gmc.invalidate_lock); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + + /* + * Issue a dummy read to wait for the ACK register to be cleared + * to avoid a false ACK due to the new fast GRBM interface. + */ + if (vmhub == AMDGPU_GFXHUB_0) + RREG32_NO_KIQ(hub->vm_inv_eng0_req + eng); + + for (j = 0; j < adev->usec_timeout; j++) { + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + if (tmp & (1 << vmid)) + break; + udelay(1); + } + spin_unlock(&adev->gmc.invalidate_lock); + if (j < adev->usec_timeout) + return; + + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, @@ -480,7 +540,11 @@ struct amdgpu_device *adev = ring->adev; uint32_t reg; - if (ring->funcs->vmhub == AMDGPU_GFXHUB) + /* Do nothing because there's no lut register for mmhub1. */ + if (ring->funcs->vmhub == AMDGPU_MMHUB_1) + return; + + if (ring->funcs->vmhub == AMDGPU_GFXHUB_0) reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid; else reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid; @@ -597,12 +661,41 @@ adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; } +static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA20: + adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; + adev->umc.channel_inst_num = UMC_V6_1_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; + adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET; + adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; + adev->umc.funcs = &umc_v6_1_funcs; + break; + default: + break; + } +} + +static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_VEGA20: + adev->mmhub_funcs = &mmhub_v1_0_funcs; + break; + default: + break; + } +} + static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; gmc_v9_0_set_gmc_funcs(adev); gmc_v9_0_set_irq_funcs(adev); + gmc_v9_0_set_umc_funcs(adev); + gmc_v9_0_set_mmhub_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = @@ -629,6 +722,8 @@ switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: return true; case CHIP_VEGA12: case CHIP_VEGA20: @@ -641,7 +736,8 @@ { struct amdgpu_ring *ring; unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = - {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP}; + {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP, + GFXHUB_FREE_VM_INV_ENGS_BITMAP}; unsigned i; unsigned vmhub, inv_eng; @@ -666,29 +762,28 @@ return 0; } -static int gmc_v9_0_ecc_late_init(void *handle) +static int gmc_v9_0_ecc_ras_block_late_init(void *handle, + struct ras_fs_if *fs_info, struct ras_common_if *ras_block) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_common_if **ras_if = &adev->gmc.ras_if; + struct ras_common_if **ras_if = NULL; struct ras_ih_if ih_info = { .cb = gmc_v9_0_process_ras_data_cb, }; - struct ras_fs_if fs_info = { - .sysfs_name = "umc_err_count", - .debugfs_name = "umc_err_inject", - }; - struct ras_common_if ras_block = { - .block = AMDGPU_RAS_BLOCK__UMC, - .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, - .sub_block_index = 0, - .name = "umc", - }; int r; - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) { - amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); + if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) + ras_if = &adev->gmc.umc_ras_if; + else if (ras_block->block == AMDGPU_RAS_BLOCK__MMHUB) + ras_if = &adev->gmc.mmhub_ras_if; + else + BUG(); + + if (!amdgpu_ras_is_supported(adev, ras_block->block)) { + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); return 0; } + /* handle resume path. */ if (*ras_if) { /* resend ras TA enable cmd during resume. @@ -700,7 +795,7 @@ if (r == -EAGAIN) { /* request a gpu reset. will run again. */ amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__UMC); + ras_block->block); return 0; } /* fail to enable ras, cleanup all. */ @@ -714,41 +809,46 @@ if (!*ras_if) return -ENOMEM; - **ras_if = ras_block; + **ras_if = *ras_block; r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1); if (r) { if (r == -EAGAIN) { amdgpu_ras_request_reset_on_boot(adev, - AMDGPU_RAS_BLOCK__UMC); + ras_block->block); r = 0; } goto feature; } ih_info.head = **ras_if; - fs_info.head = **ras_if; + fs_info->head = **ras_if; - r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); - if (r) - goto interrupt; + if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) { + r = amdgpu_ras_interrupt_add_handler(adev, &ih_info); + if (r) + goto interrupt; + } - amdgpu_ras_debugfs_create(adev, &fs_info); + amdgpu_ras_debugfs_create(adev, fs_info); - r = amdgpu_ras_sysfs_create(adev, &fs_info); + r = amdgpu_ras_sysfs_create(adev, fs_info); if (r) goto sysfs; resume: - r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); - if (r) - goto irq; + if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) { + r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); + if (r) + goto irq; + } return 0; irq: amdgpu_ras_sysfs_remove(adev, *ras_if); sysfs: amdgpu_ras_debugfs_remove(adev, *ras_if); - amdgpu_ras_interrupt_remove_handler(adev, &ih_info); + if (ras_block->block == AMDGPU_RAS_BLOCK__UMC) + amdgpu_ras_interrupt_remove_handler(adev, &ih_info); interrupt: amdgpu_ras_feature_enable(adev, *ras_if, 0); feature: @@ -757,6 +857,40 @@ return r; } +static int gmc_v9_0_ecc_late_init(void *handle) +{ + int r; + + struct ras_fs_if umc_fs_info = { + .sysfs_name = "umc_err_count", + .debugfs_name = "umc_err_inject", + }; + struct ras_common_if umc_ras_block = { + .block = AMDGPU_RAS_BLOCK__UMC, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .sub_block_index = 0, + .name = "umc", + }; + struct ras_fs_if mmhub_fs_info = { + .sysfs_name = "mmhub_err_count", + .debugfs_name = "mmhub_err_inject", + }; + struct ras_common_if mmhub_ras_block = { + .block = AMDGPU_RAS_BLOCK__MMHUB, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .sub_block_index = 0, + .name = "mmhub", + }; + + r = gmc_v9_0_ecc_ras_block_late_init(handle, + &umc_fs_info, &umc_ras_block); + if (r) + return r; + + r = gmc_v9_0_ecc_ras_block_late_init(handle, + &mmhub_fs_info, &mmhub_ras_block); + return r; +} static int gmc_v9_0_late_init(void *handle) { @@ -806,14 +940,17 @@ struct amdgpu_gmc *mc) { u64 base = 0; - if (!amdgpu_sriov_vf(adev)) + + if (adev->asic_type == CHIP_ARCTURUS) + base = mmhub_v9_4_get_fb_location(adev); + else if (!amdgpu_sriov_vf(adev)) base = mmhub_v1_0_get_fb_location(adev); + /* add the xgmi offset of the physical node */ base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; amdgpu_gmc_vram_location(adev, mc, base); amdgpu_gmc_gart_location(adev, mc); - if (!amdgpu_sriov_vf(adev)) - amdgpu_gmc_agp_location(adev, mc); + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); @@ -887,10 +1024,12 @@ case CHIP_VEGA10: /* all engines support GPUVM */ case CHIP_VEGA12: /* all engines support GPUVM */ case CHIP_VEGA20: + case CHIP_ARCTURUS: default: adev->gmc.gart_size = 512ULL << 20; break; case CHIP_RAVEN: /* DCE SG support */ + case CHIP_RENOIR: adev->gmc.gart_size = 1024ULL << 20; break; } @@ -923,7 +1062,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) { - u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); + u32 d1vga_control; unsigned size; /* @@ -933,6 +1072,7 @@ if (gmc_v9_0_keep_stolen_memory(adev)) return 9 * 1024 * 1024; + d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ } else { @@ -940,6 +1080,7 @@ switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: viewport = RREG32_SOC15(DCE, 0, mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION); size = (REG_GET_FIELD(viewport, HUBP0_DCSURF_PRI_VIEWPORT_DIMENSION, PRI_VIEWPORT_HEIGHT) * @@ -968,17 +1109,21 @@ static int gmc_v9_0_sw_init(void *handle) { int r; - int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfxhub_v1_0_init(adev); - mmhub_v1_0_init(adev); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_init(adev); + else + mmhub_v1_0_init(adev); spin_lock_init(&adev->gmc.invalidate_lock); adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev); switch (adev->asic_type) { case CHIP_RAVEN: + adev->num_vmhubs = 2; + if (adev->rev_id == 0x0 || adev->rev_id == 0x1) { amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); } else { @@ -991,6 +1136,10 @@ case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_RENOIR: + adev->num_vmhubs = 2; + + /* * To fulfill 4-level page support, * vm size is 256TB (48bit), maximum size of Vega10, @@ -1002,6 +1151,12 @@ else amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); break; + case CHIP_ARCTURUS: + adev->num_vmhubs = 3; + + /* Keep the vm size same with Vega20 */ + amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); + break; default: break; } @@ -1012,6 +1167,13 @@ if (r) return r; + if (adev->asic_type == CHIP_ARCTURUS) { + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC1, VMC_1_0__SRCID__VM_FAULT, + &adev->gmc.vm_fault); + if (r) + return r; + } + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); @@ -1030,25 +1192,12 @@ */ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ - /* set DMA mask + need_dma32 flags. - * PCIE - can handle 44-bits. - * IGP - can handle 44-bits - * PCI - dma32 for legacy pci gart, 44 bits on vega10 - */ - adev->need_dma32 = false; - dma_bits = adev->need_dma32 ? 32 : 44; - r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); if (r) { - adev->need_dma32 = true; - dma_bits = 32; printk(KERN_WARNING "amdgpu: No suitable DMA available.\n"); + return r; } - r = pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); - if (r) { - pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32)); - printk(KERN_WARNING "amdgpu: No coherent DMA available.\n"); - } - adev->need_swiotlb = drm_need_swiotlb(dma_bits); + adev->need_swiotlb = drm_need_swiotlb(44); if (adev->gmc.xgmi.supported) { r = gfxhub_v1_1_get_xgmi_info(adev); @@ -1077,8 +1226,9 @@ * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB_1].num_ids = AMDGPU_NUM_OF_VMIDS; amdgpu_vm_manager_init(adev); @@ -1088,28 +1238,40 @@ static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + void *stolen_vga_buf; if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && - adev->gmc.ras_if) { - struct ras_common_if *ras_if = adev->gmc.ras_if; + adev->gmc.umc_ras_if) { + struct ras_common_if *ras_if = adev->gmc.umc_ras_if; struct ras_ih_if ih_info = { .head = *ras_if, }; - /*remove fs first*/ + /* remove fs first */ amdgpu_ras_debugfs_remove(adev, ras_if); amdgpu_ras_sysfs_remove(adev, ras_if); - /*remove the IH*/ + /* remove the IH */ amdgpu_ras_interrupt_remove_handler(adev, &ih_info); amdgpu_ras_feature_enable(adev, ras_if, 0); kfree(ras_if); } + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && + adev->gmc.mmhub_ras_if) { + struct ras_common_if *ras_if = adev->gmc.mmhub_ras_if; + + /* remove fs and disable ras feature */ + amdgpu_ras_debugfs_remove(adev, ras_if); + amdgpu_ras_sysfs_remove(adev, ras_if); + amdgpu_ras_feature_enable(adev, ras_if, 0); + kfree(ras_if); + } + amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); if (gmc_v9_0_keep_stolen_memory(adev)) - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, &stolen_vga_buf); amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); @@ -1123,7 +1285,7 @@ switch (adev->asic_type) { case CHIP_VEGA10: - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) break; /* fall through */ case CHIP_VEGA20: @@ -1137,6 +1299,7 @@ case CHIP_VEGA12: break; case CHIP_RAVEN: + /* TODO for renoir */ soc15_program_register_sequence(adev, golden_settings_athub_1_0_0, ARRAY_SIZE(golden_settings_athub_1_0_0)); @@ -1153,7 +1316,7 @@ */ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) { - int r; + int r, i; bool value; u32 tmp; @@ -1171,6 +1334,7 @@ switch (adev->asic_type) { case CHIP_RAVEN: + /* TODO for renoir */ mmhub_v1_0_update_power_gating(adev, true); break; default: @@ -1181,7 +1345,10 @@ if (r) return r; - r = mmhub_v1_0_gart_enable(adev); + if (adev->asic_type == CHIP_ARCTURUS) + r = mmhub_v9_4_gart_enable(adev); + else + r = mmhub_v1_0_gart_enable(adev); if (r) return r; @@ -1202,8 +1369,13 @@ value = true; gfxhub_v1_0_set_fault_enable_default(adev, value); - mmhub_v1_0_set_fault_enable_default(adev, value); - gmc_v9_0_flush_gpu_tlb(adev, 0, 0); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_set_fault_enable_default(adev, value); + else + mmhub_v1_0_set_fault_enable_default(adev, value); + + for (i = 0; i < adev->num_vmhubs; ++i) + gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), @@ -1243,7 +1415,10 @@ static void gmc_v9_0_gart_disable(struct amdgpu_device *adev) { gfxhub_v1_0_gart_disable(adev); - mmhub_v1_0_gart_disable(adev); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_gart_disable(adev); + else + mmhub_v1_0_gart_disable(adev); amdgpu_gart_table_vram_unpin(adev); } @@ -1308,14 +1483,26 @@ { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return mmhub_v1_0_set_clockgating(adev, state); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_set_clockgating(adev, state); + else + mmhub_v1_0_set_clockgating(adev, state); + + athub_v1_0_set_clockgating(adev, state); + + return 0; } static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - mmhub_v1_0_get_clockgating(adev, flags); + if (adev->asic_type == CHIP_ARCTURUS) + mmhub_v9_4_get_clockgating(adev, flags); + else + mmhub_v1_0_get_clockgating(adev, flags); + + athub_v1_0_get_clockgating(adev, flags); } static int gmc_v9_0_set_powergating_state(void *handle, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h 2019-08-31 15:01:11.848736167 -0500 @@ -37,4 +37,11 @@ extern const struct amd_ip_funcs gmc_v9_0_ip_funcs; extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; +/* amdgpu_amdkfd*.c */ +void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t value); +void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t value); +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, + uint32_t vmid, uint64_t value); #endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/Makefile linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/Makefile --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/Makefile 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/Makefile 2019-08-31 15:01:11.838736166 -0500 @@ -54,7 +54,7 @@ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ - amdgpu_vm_sdma.o amdgpu_discovery.o + amdgpu_vm_sdma.o amdgpu_pmu.o amdgpu_discovery.o amdgpu_ras_eeprom.o smu_v11_0_i2c.o amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o @@ -66,7 +66,8 @@ amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ - vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o + vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ + arct_reg_init.o navi12_reg_init.o # add DF block amdgpu-y += \ @@ -77,9 +78,13 @@ amdgpu-y += \ gmc_v7_0.o \ gmc_v8_0.o \ - gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o \ + gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o mmhub_v9_4.o \ gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o +# add UMC block +amdgpu-y += \ + umc_v6_1.o + # add IH block amdgpu-y += \ amdgpu_irq.o \ @@ -95,7 +100,8 @@ amdgpu_psp.o \ psp_v3_1.o \ psp_v10_0.o \ - psp_v11_0.o + psp_v11_0.o \ + psp_v12_0.o # add SMC block amdgpu-y += \ @@ -144,10 +150,12 @@ amdgpu-y += \ amdgpu_vcn.o \ vcn_v1_0.o \ - vcn_v2_0.o + vcn_v2_0.o \ + vcn_v2_5.o # add ATHUB block amdgpu-y += \ + athub_v1_0.o \ athub_v2_0.o # add amdkfd interfaces @@ -162,6 +170,7 @@ amdgpu_amdkfd_gpuvm.o \ amdgpu_amdkfd_gfx_v8.o \ amdgpu_amdkfd_gfx_v9.o \ + amdgpu_amdkfd_arcturus.o \ amdgpu_amdkfd_gfx_v10.o ifneq ($(CONFIG_DRM_AMDGPU_CIK),) diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 2019-08-31 15:01:11.848736167 -0500 @@ -21,13 +21,13 @@ * */ #include "amdgpu.h" +#include "amdgpu_ras.h" #include "mmhub_v1_0.h" #include "mmhub/mmhub_1_0_offset.h" #include "mmhub/mmhub_1_0_sh_mask.h" #include "mmhub/mmhub_1_0_default.h" -#include "athub/athub_1_0_offset.h" -#include "athub/athub_1_0_sh_mask.h" +#include "mmhub/mmhub_9_4_0_offset.h" #include "vega10_enum.h" #include "soc15_common.h" @@ -35,6 +35,9 @@ #define mmDAGB0_CNTL_MISC2_RV 0x008f #define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0 +#define EA_EDC_CNT_MASK 0x3 +#define EA_EDC_CNT_SHIFT 0x2 + u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) { u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); @@ -111,7 +114,7 @@ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; /* Set default page address. */ @@ -159,7 +162,7 @@ { uint32_t tmp; - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; /* Setup L2 cache */ @@ -208,7 +211,7 @@ static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) { - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, @@ -348,7 +351,7 @@ 0); WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); - if (!amdgpu_virt_support_skip_setting(adev)) { + if (!amdgpu_sriov_vf(adev)) { /* Setup L2 cache */ tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); @@ -367,7 +370,7 @@ { u32 tmp; - if (amdgpu_virt_support_skip_setting(adev)) + if (amdgpu_sriov_vf(adev)) return; tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); @@ -407,7 +410,7 @@ void mmhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, @@ -491,22 +494,6 @@ WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2); } -static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) -{ - uint32_t def, data; - - def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) - data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; - else - data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; - - if (def != data) - WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); -} - static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, bool enable) { @@ -523,23 +510,6 @@ WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data); } -static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, - bool enable) -{ - uint32_t def, data; - - def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && - (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) - data |= ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; - else - data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; - - if(def != data) - WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); -} - int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state) { @@ -551,14 +521,11 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_RENOIR: mmhub_v1_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); - athub_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); mmhub_v1_0_update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE ? true : false); - athub_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); break; default: break; @@ -569,18 +536,85 @@ void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) { - int data; + int data, data1; if (amdgpu_sriov_vf(adev)) *flags = 0; + data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); + + data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + /* AMD_CG_SUPPORT_MC_MGCG */ - data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); - if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) + if ((data & ATC_L2_MISC_CG__ENABLE_MASK) && + !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) *flags |= AMD_CG_SUPPORT_MC_MGCG; /* AMD_CG_SUPPORT_MC_LS */ - data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_MC_LS; } + +static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + int i; + uint32_t ea0_edc_cnt, ea0_edc_cnt2; + uint32_t ea1_edc_cnt, ea1_edc_cnt2; + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + /* EDC CNT will be cleared automatically after read */ + ea0_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT_VG20); + ea0_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA0_EDC_CNT2_VG20); + ea1_edc_cnt = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT_VG20); + ea1_edc_cnt2 = RREG32_SOC15(MMHUB, 0, mmMMEA1_EDC_CNT2_VG20); + + /* error count of each error type is recorded by 2 bits, + * ce and ue count in EDC_CNT + */ + for (i = 0; i < 5; i++) { + err_data->ce_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); + err_data->ce_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); + ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; + err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); + ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; + } + /* successive ue count in EDC_CNT */ + for (i = 0; i < 5; i++) { + err_data->ue_count += (ea0_edc_cnt & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt & EA_EDC_CNT_MASK); + ea0_edc_cnt >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt >>= EA_EDC_CNT_SHIFT; + } + + /* ce and ue count in EDC_CNT2 */ + for (i = 0; i < 3; i++) { + err_data->ce_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); + err_data->ce_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); + ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); + ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + } + /* successive ue count in EDC_CNT2 */ + for (i = 0; i < 6; i++) { + err_data->ue_count += (ea0_edc_cnt2 & EA_EDC_CNT_MASK); + err_data->ue_count += (ea1_edc_cnt2 & EA_EDC_CNT_MASK); + ea0_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + ea1_edc_cnt2 >>= EA_EDC_CNT_SHIFT; + } +} + +const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = { + .query_ras_error_count = mmhub_v1_0_query_ras_error_count, +}; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h 2019-08-31 15:01:11.849736167 -0500 @@ -23,6 +23,8 @@ #ifndef __MMHUB_V1_0_H__ #define __MMHUB_V1_0_H__ +extern const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs; + u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev); int mmhub_v1_0_gart_enable(struct amdgpu_device *adev); void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c 2019-08-31 15:01:11.849736167 -0500 @@ -126,7 +126,7 @@ /* XXX for emulation, Refer to closed source code.*/ tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); - tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); + tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 0); tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL, tmp); @@ -324,7 +324,7 @@ void mmhub_v2_0_init(struct amdgpu_device *adev) { - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; + struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB_0]; hub->ctx0_ptb_addr_lo32 = SOC15_REG_OFFSET(MMHUB, 0, @@ -406,6 +406,8 @@ switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: mmhub_v2_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); mmhub_v2_0_update_medium_grain_light_sleep(adev, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c 2019-08-31 15:01:11.849736167 -0500 @@ -0,0 +1,642 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "mmhub_v9_4.h" + +#include "mmhub/mmhub_9_4_1_offset.h" +#include "mmhub/mmhub_9_4_1_sh_mask.h" +#include "mmhub/mmhub_9_4_1_default.h" +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "vega10_enum.h" + +#include "soc15_common.h" + +#define MMHUB_NUM_INSTANCES 2 +#define MMHUB_INSTANCE_REGISTER_OFFSET 0x3000 + +u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) +{ + /* The base should be same b/t 2 mmhubs on Acrturus. Read one here. */ + u64 base = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE); + u64 top = RREG32_SOC15(MMHUB, 0, mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP); + + base &= VMSHAREDVC0_MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; + base <<= 24; + + top &= VMSHAREDVC0_MC_VM_FB_LOCATION_TOP__FB_TOP_MASK; + top <<= 24; + + adev->gmc.fb_start = base; + adev->gmc.fb_end = top; + + return base; +} + +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, + uint32_t vmid, uint64_t value) +{ + /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to + * mmVML2VC0_VM_CONTEXT1_* + */ + int dist = mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + lower_32_bits(value)); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + dist * vmid + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + upper_32_bits(value)); + +} + +static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, + int hubid) +{ + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_start >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_start >> 44)); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_end >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->gmc.gart_end >> 44)); +} + +static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, + int hubid) +{ + uint64_t value; + uint32_t tmp; + + /* Program the AGP BAR */ + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BASE, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_TOP, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.agp_end >> 24); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_AGP_BOT, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.agp_start >> 24); + + /* Program the system aperture low logical page number. */ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_LOW_ADDR, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_SYSTEM_APERTURE_HIGH_ADDR, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(value >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDPF0_MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)(adev->dummy_page_addr >> 12)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, + (u32)((u64)adev->dummy_page_addr >> 44)); + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + + /* Setup TLB control */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 1); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ATC_EN, 1); + + WREG32_SOC15_OFFSET(MMHUB, 0, mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + + /* Setup L2 cache */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_CACHE, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_FRAGMENT_PROCESSING, 1); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + L2_PDE0_CACHE_TAG_GENERATION_MODE, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + PDE_FAULT_CLASSIFICATION, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, + INVALIDATE_ALL_L1_TLBS, 1); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL2, + INVALIDATE_L2_CACHE, 1); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL2, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = mmVML2PF0_VM_L2_CNTL3_DEFAULT; + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + tmp = mmVML2PF0_VM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, + VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL4, + VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL4, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_enable_system_domain(struct amdgpu_device *adev, + int hubid) +{ + uint32_t tmp; + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); +} + +static void mmhub_v9_4_disable_identity_aperture(struct amdgpu_device *adev, + int hubid) +{ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0XFFFFFFFF); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0x0000000F); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET, 0); +} + +static void mmhub_v9_4_setup_vmid_config(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + int i; + + for (i = 0; i <= 14; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + ENABLE_CONTEXT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PAGE_TABLE_DEPTH, + adev->vm_manager.num_level); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1); + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + PAGE_TABLE_BLOCK_SIZE, + adev->vm_manager.block_size - 9); + /* Send no-retry XNACK on fault to suppress VM fault storm. */ + tmp = REG_SET_FIELD(tmp, VML2VC0_VM_CONTEXT1_CNTL, + RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT1_CNTL, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i, + tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + i*2, + upper_32_bits(adev->vm_manager.max_pfn - 1)); + } +} + +static void mmhub_v9_4_program_invalidation(struct amdgpu_device *adev, + int hubid) +{ + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + hubid * MMHUB_INSTANCE_REGISTER_OFFSET + 2 * i, + 0x1f); + } +} + +int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + if (amdgpu_sriov_vf(adev)) { + /* + * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase + * they are VF copy registers so vbios post doesn't + * program them, for SRIOV driver need to program them + */ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_FB_LOCATION_BASE, + i * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.vram_start >> 24); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_FB_LOCATION_TOP, + i * MMHUB_INSTANCE_REGISTER_OFFSET, + adev->gmc.vram_end >> 24); + } + + /* GART Enable. */ + mmhub_v9_4_init_gart_aperture_regs(adev, i); + mmhub_v9_4_init_system_aperture_regs(adev, i); + mmhub_v9_4_init_tlb_regs(adev, i); + mmhub_v9_4_init_cache_regs(adev, i); + + mmhub_v9_4_enable_system_domain(adev, i); + mmhub_v9_4_disable_identity_aperture(adev, i); + mmhub_v9_4_setup_vmid_config(adev, i); + mmhub_v9_4_program_invalidation(adev, i); + } + + return 0; +} + +void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) +{ + u32 tmp; + u32 i, j; + + for (j = 0; j < MMHUB_NUM_INSTANCES; j++) { + /* Disable all tables */ + for (i = 0; i < 16; i++) + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET + + i, 0); + + /* Setup TLB control */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_L1_TLB, 0); + tmp = REG_SET_FIELD(tmp, + VMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVMSHAREDVC0_MC_VM_MX_L1_TLB_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + + /* Setup L2 cache */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_CNTL, + ENABLE_L2_CACHE, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL, + j * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2PF0_VM_L2_CNTL3, + j * MMHUB_INSTANCE_REGISTER_OFFSET, 0); + } +} + +/** + * mmhub_v1_0_set_fault_enable_default - update GART/VM fault handling + * + * @adev: amdgpu_device pointer + * @value: true redirects VM faults to the default page + */ +void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, bool value) +{ + u32 tmp; + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + i * MMHUB_INSTANCE_REGISTER_OFFSET); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE0_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE1_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + PDE2_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + TRANSLATE_FURTHER_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + NACK_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + VALID_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + READ_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + tmp = REG_SET_FIELD(tmp, VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, + value); + if (!value) { + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_NO_RETRY_FAULT, 1); + tmp = REG_SET_FIELD(tmp, + VML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + CRASH_ON_RETRY_FAULT, 1); + } + + WREG32_SOC15_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL, + i * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); + } +} + +void mmhub_v9_4_init(struct amdgpu_device *adev) +{ + struct amdgpu_vmhub *hub[MMHUB_NUM_INSTANCES] = + {&adev->vmhub[AMDGPU_MMHUB_0], &adev->vmhub[AMDGPU_MMHUB_1]}; + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + hub[i]->ctx0_ptb_addr_lo32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->ctx0_ptb_addr_hi32 = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_req = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_REQ) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_inv_eng0_ack = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_INVALIDATE_ENG0_ACK) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_context0_cntl = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2VC0_VM_CONTEXT0_CNTL) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_l2_pro_fault_status = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_STATUS) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + hub[i]->vm_l2_pro_fault_cntl = + SOC15_REG_OFFSET(MMHUB, 0, + mmVML2PF0_VM_L2_PROTECTION_FAULT_CNTL) + + i * MMHUB_INSTANCE_REGISTER_OFFSET; + } +} + +static void mmhub_v9_4_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data, def1, data1; + int i, j; + int dist = mmDAGB1_CNTL_MISC2 - mmDAGB0_CNTL_MISC2; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + def = data = RREG32_SOC15_OFFSET(MMHUB, 0, + mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) + data |= ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; + else + data &= ~ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK; + + if (def != data) + WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET, data); + + for (j = 0; j < 5; j++) { + def1 = data1 = RREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_CNTL_MISC2, + i * MMHUB_INSTANCE_REGISTER_OFFSET + + j * dist); + if (enable && + (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { + data1 &= + ~(DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } else { + data1 |= + (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + } + + if (def1 != data1) + WREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_CNTL_MISC2, + i * MMHUB_INSTANCE_REGISTER_OFFSET + + j * dist, data1); + + if (i == 1 && j == 3) + break; + } + } +} + +static void mmhub_v9_4_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + def = data = RREG32_SOC15_OFFSET(MMHUB, 0, + mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) + data |= ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + else + data &= ~ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; + + if (def != data) + WREG32_SOC15_OFFSET(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG, + i * MMHUB_INSTANCE_REGISTER_OFFSET, data); + } +} + +int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) +{ + if (amdgpu_sriov_vf(adev)) + return 0; + + switch (adev->asic_type) { + case CHIP_ARCTURUS: + mmhub_v9_4_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + mmhub_v9_4_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; + default: + break; + } + + return 0; +} + +void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags) +{ + int data, data1; + + if (amdgpu_sriov_vf(adev)) + *flags = 0; + + /* AMD_CG_SUPPORT_MC_MGCG */ + data = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG); + + data1 = RREG32_SOC15(MMHUB, 0, mmATCL2_0_ATC_L2_MISC_CG); + + if ((data & ATCL2_0_ATC_L2_MISC_CG__ENABLE_MASK) && + !(data1 & (DAGB0_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK))) + *flags |= AMD_CG_SUPPORT_MC_MGCG; + + /* AMD_CG_SUPPORT_MC_LS */ + if (data & ATCL2_0_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_MC_LS; +} diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h 2019-08-31 15:01:11.849736167 -0500 @@ -0,0 +1,36 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __MMHUB_V9_4_H__ +#define __MMHUB_V9_4_H__ + +u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev); +int mmhub_v9_4_gart_enable(struct amdgpu_device *adev); +void mmhub_v9_4_gart_disable(struct amdgpu_device *adev); +void mmhub_v9_4_set_fault_enable_default(struct amdgpu_device *adev, + bool value); +void mmhub_v9_4_init(struct amdgpu_device *adev); +int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags); + +#endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c 2019-08-31 15:01:11.849736167 -0500 @@ -449,20 +449,6 @@ amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); } -static void xgpu_ai_init_reg_access_mode(struct amdgpu_device *adev) -{ - adev->virt.reg_access_mode = AMDGPU_VIRT_REG_ACCESS_LEGACY; - - /* Enable L1 security reg access mode by defaul, as non-security VF - * will no longer be supported. - */ - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_RLC; - - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_ACCESS_PSP_PRG_IH; - - adev->virt.reg_access_mode |= AMDGPU_VIRT_REG_SKIP_SEETING; -} - const struct amdgpu_virt_ops xgpu_ai_virt_ops = { .req_full_gpu = xgpu_ai_request_full_gpu_access, .rel_full_gpu = xgpu_ai_release_full_gpu_access, @@ -471,5 +457,4 @@ .trans_msg = xgpu_ai_mailbox_trans_msg, .get_pp_clk = xgpu_ai_get_pp_clk, .force_dpm_level = xgpu_ai_force_dpm_level, - .init_reg_access_mode = xgpu_ai_init_reg_access_mode, }; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi10_ih.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi10_ih.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi10_ih.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi10_ih.c 2019-08-31 15:01:11.849736167 -0500 @@ -21,7 +21,8 @@ * */ -#include +#include + #include "amdgpu.h" #include "amdgpu_ih.h" diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi10_reg_init.c 2019-08-31 15:01:11.849736167 -0500 @@ -29,20 +29,8 @@ int navi10_reg_base_init(struct amdgpu_device *adev) { - int r, i; + int i; - if (amdgpu_discovery) { - r = amdgpu_discovery_reg_base_init(adev); - if (r) { - DRM_WARN("failed to init reg base from ip discovery table, " - "fallback to legacy init method\n"); - goto legacy_init; - } - - return 0; - } - -legacy_init: for (i = 0 ; i < MAX_INSTANCE ; ++i) { adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi12_reg_init.c 2019-08-31 15:01:11.849736167 -0500 @@ -0,0 +1,53 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "navi12_ip_offset.h" + +int navi12_reg_base_init(struct amdgpu_device *adev) +{ + /* HW has more IP blocks, only initialized the blocks needed by driver */ + uint32_t i; + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + return 0; +} diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/navi14_reg_init.c 2019-08-31 15:01:11.849736167 -0500 @@ -0,0 +1,54 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "nv.h" + +#include "soc15_common.h" +#include "soc15_hw_ip.h" +#include "navi14_ip_offset.h" + +int navi14_reg_base_init(struct amdgpu_device *adev) +{ + int i; + + for (i = 0 ; i < MAX_INSTANCE ; ++i) { + adev->reg_offset[GC_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[HDP_HWIP][i] = (uint32_t *)(&(HDP_BASE.instance[i])); + adev->reg_offset[MMHUB_HWIP][i] = (uint32_t *)(&(MMHUB_BASE.instance[i])); + adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i])); + adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIF0_BASE.instance[i])); + adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i])); + adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i])); + adev->reg_offset[VCN_HWIP][i] = (uint32_t *)(&(UVD0_BASE.instance[i])); + adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i])); + adev->reg_offset[DCE_HWIP][i] = (uint32_t *)(&(DMU_BASE.instance[i])); + adev->reg_offset[OSSSYS_HWIP][i] = (uint32_t *)(&(OSSSYS_BASE.instance[i])); + adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(GC_BASE.instance[i])); + adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); + adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + } + + return 0; +} diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c 2019-08-31 15:01:11.849736167 -0500 @@ -92,7 +92,7 @@ } static void nbio_v2_3_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, - int doorbell_index) + int doorbell_index, int instance) { u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c 2019-08-31 15:01:11.849736167 -0500 @@ -91,6 +91,26 @@ WREG32(reg, doorbell_range); } +static void nbio_v7_0_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance) +{ + u32 reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + + u32 doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); + + WREG32(reg, doorbell_range); +} + static void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { @@ -282,6 +302,7 @@ .hdp_flush = nbio_v7_0_hdp_flush, .get_memsize = nbio_v7_0_get_memsize, .sdma_doorbell_range = nbio_v7_0_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v7_0_vcn_doorbell_range, .enable_doorbell_aperture = nbio_v7_0_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_0_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_0_ih_doorbell_range, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c 2019-08-31 15:01:11.849736167 -0500 @@ -31,6 +31,25 @@ #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c +/* + * These are nbio v7_4_1 registers mask. Temporarily define these here since + * nbio v7_4_1 header is incomplete. + */ +#define GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK 0x00001000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK 0x00002000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK 0x00004000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK 0x00008000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK 0x00010000L +#define GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK 0x00020000L + +#define mmBIF_MMSCH1_DOORBELL_RANGE 0x01dc +#define mmBIF_MMSCH1_DOORBELL_RANGE_BASE_IDX 2 +//BIF_MMSCH1_DOORBELL_RANGE +#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET__SHIFT 0x2 +#define BIF_MMSCH1_DOORBELL_RANGE__SIZE__SHIFT 0x10 +#define BIF_MMSCH1_DOORBELL_RANGE__OFFSET_MASK 0x00000FFCL +#define BIF_MMSCH1_DOORBELL_RANGE__SIZE_MASK 0x001F0000L + static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev) { WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, @@ -75,10 +94,24 @@ static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index, int doorbell_size) { - u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : - SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); + u32 reg, doorbell_range; + + if (instance < 2) + reg = instance + + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); + else + /* + * These registers address of SDMA2~7 is not consecutive + * from SDMA0~1. Need plus 4 dwords offset. + * + * BIF_SDMA0_DOORBELL_RANGE: 0x3bc0 + * BIF_SDMA1_DOORBELL_RANGE: 0x3bc4 + * BIF_SDMA2_DOORBELL_RANGE: 0x3bd8 + */ + reg = instance + 0x4 + + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE); - u32 doorbell_range = RREG32(reg); + doorbell_range = RREG32(reg); if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); @@ -89,6 +122,32 @@ WREG32(reg, doorbell_range); } +static void nbio_v7_4_vcn_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, + int doorbell_index, int instance) +{ + u32 reg; + u32 doorbell_range; + + if (instance) + reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH1_DOORBELL_RANGE); + else + reg = SOC15_REG_OFFSET(NBIO, 0, mmBIF_MMSCH0_DOORBELL_RANGE); + + doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, OFFSET, + doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 8); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, + BIF_MMSCH0_DOORBELL_RANGE, SIZE, 0); + + WREG32(reg, doorbell_range); +} + static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { @@ -220,6 +279,12 @@ .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK, .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, + .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG0_MASK, + .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, + .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, + .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, + .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, + .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, }; static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) @@ -261,6 +326,7 @@ .hdp_flush = nbio_v7_4_hdp_flush, .get_memsize = nbio_v7_4_get_memsize, .sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range, + .vcn_doorbell_range = nbio_v7_4_vcn_doorbell_range, .enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture, .enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture, .ih_doorbell_range = nbio_v7_4_ih_doorbell_range, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nv.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nv.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nv.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nv.c 2019-08-31 15:01:12.265736204 -0500 @@ -23,7 +23,8 @@ #include #include #include -#include +#include + #include "amdgpu.h" #include "amdgpu_atombios.h" #include "amdgpu_ih.h" @@ -289,6 +290,18 @@ return ret; } + +static enum amd_reset_method +nv_asic_reset_method(struct amdgpu_device *adev) +{ + struct smu_context *smu = &adev->smu; + + if (smu_baco_is_support(smu)) + return AMD_RESET_METHOD_BACO; + else + return AMD_RESET_METHOD_MODE1; +} + static int nv_asic_reset(struct amdgpu_device *adev) { @@ -303,10 +316,13 @@ int ret = 0; struct smu_context *smu = &adev->smu; - if (smu_baco_is_support(smu)) + if (nv_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { + amdgpu_inc_vram_lost(adev); ret = smu_baco_reset(smu); - else + } else { + amdgpu_inc_vram_lost(adev); ret = nv_asic_mode1_reset(adev); + } return ret; } @@ -363,23 +379,55 @@ .funcs = &nv_common_ip_funcs, }; -int nv_set_ip_blocks(struct amdgpu_device *adev) +static int nv_reg_base_init(struct amdgpu_device *adev) { - /* Set IP register base before any HW register access */ + int r; + + if (amdgpu_discovery) { + r = amdgpu_discovery_reg_base_init(adev); + if (r) { + DRM_WARN("failed to init reg base from ip discovery table, " + "fallback to legacy init method\n"); + goto legacy_init; + } + + return 0; + } + +legacy_init: switch (adev->asic_type) { case CHIP_NAVI10: navi10_reg_base_init(adev); break; + case CHIP_NAVI14: + navi14_reg_base_init(adev); + break; + case CHIP_NAVI12: + navi12_reg_base_init(adev); + break; default: return -EINVAL; } + return 0; +} + +int nv_set_ip_blocks(struct amdgpu_device *adev) +{ + int r; + + /* Set IP register base before any HW register access */ + r = nv_reg_base_init(adev); + if (r) + return r; + adev->nbio_funcs = &nbio_v2_3_funcs; adev->nbio_funcs->detect_hw_virt(adev); switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: amdgpu_device_ip_block_add(adev, &nv_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); @@ -402,6 +450,27 @@ if (adev->enable_mes) amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); break; + case CHIP_NAVI12: + amdgpu_device_ip_block_add(adev, &nv_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#endif + amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v5_0_ip_block); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && + is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + break; default: return -EINVAL; } @@ -496,6 +565,7 @@ .read_bios_from_rom = &nv_read_bios_from_rom, .read_register = &nv_read_register, .reset = &nv_asic_reset, + .reset_method = &nv_asic_reset_method, .set_vga_state = &nv_vga_set_state, .get_xclk = &nv_get_xclk, .set_uvd_clocks = &nv_set_uvd_clocks, @@ -511,7 +581,6 @@ static int nv_common_early_init(void *handle) { - bool psp_enabled = false; struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->smc_rreg = NULL; @@ -528,10 +597,6 @@ adev->asic_funcs = &nv_asic_funcs; - if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) && - (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) - psp_enabled = true; - adev->rev_id = nv_get_rev_id(adev); adev->external_rev_id = 0xff; switch (adev->asic_type) { @@ -555,6 +620,46 @@ AMD_PG_SUPPORT_ATHUB; adev->external_rev_id = adev->rev_id + 0x1; break; + case CHIP_NAVI14: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; + adev->external_rev_id = adev->rev_id + 20; + break; + case CHIP_NAVI12: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_VCN_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_ATHUB; + adev->external_rev_id = adev->rev_id + 0xa; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -747,6 +852,8 @@ switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); adev->nbio_funcs->update_medium_grain_light_sleep(adev, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nv.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nv.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/nv.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/nv.h 2019-08-31 15:01:11.849736167 -0500 @@ -30,4 +30,6 @@ u32 me, u32 pipe, u32 queue, u32 vmid); int nv_set_ip_blocks(struct amdgpu_device *adev); int navi10_reg_base_init(struct amdgpu_device *adev); +int navi14_reg_base_init(struct amdgpu_device *adev); +int navi12_reg_base_init(struct amdgpu_device *adev); #endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h 2019-08-31 15:01:11.849736167 -0500 @@ -233,8 +233,15 @@ GFX_FW_TYPE_RLCP_CAM = 46, /* RLCP CAM NV */ GFX_FW_TYPE_RLC_SPP_CAM_EXT = 47, /* RLC SPP CAM EXT NV */ GFX_FW_TYPE_RLX6_DRAM_BOOT = 48, /* RLX6 DRAM BOOT NV */ - GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV */ - GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV */ + GFX_FW_TYPE_VCN0_RAM = 49, /* VCN_RAM NV + RN */ + GFX_FW_TYPE_VCN1_RAM = 50, /* VCN_RAM NV + RN */ + GFX_FW_TYPE_DMUB = 51, /* DMUB RN */ + GFX_FW_TYPE_SDMA2 = 52, /* SDMA2 MI */ + GFX_FW_TYPE_SDMA3 = 53, /* SDMA3 MI */ + GFX_FW_TYPE_SDMA4 = 54, /* SDMA4 MI */ + GFX_FW_TYPE_SDMA5 = 55, /* SDMA5 MI */ + GFX_FW_TYPE_SDMA6 = 56, /* SDMA6 MI */ + GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */ GFX_FW_TYPE_MAX }; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c 2019-08-31 15:01:11.850736167 -0500 @@ -190,7 +190,6 @@ } static int psp_v10_0_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index) { diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 2019-08-31 15:01:11.850736167 -0500 @@ -43,6 +43,12 @@ MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); MODULE_FIRMWARE("amdgpu/navi10_sos.bin"); MODULE_FIRMWARE("amdgpu/navi10_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi14_asd.bin"); +MODULE_FIRMWARE("amdgpu/navi12_sos.bin"); +MODULE_FIRMWARE("amdgpu/navi12_asd.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_sos.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_asd.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -60,6 +66,7 @@ int err = 0; const struct psp_firmware_header_v1_0 *sos_hdr; const struct psp_firmware_header_v1_1 *sos_hdr_v1_1; + const struct psp_firmware_header_v1_2 *sos_hdr_v1_2; const struct psp_firmware_header_v1_0 *asd_hdr; const struct ta_firmware_header_v1_0 *ta_hdr; @@ -72,6 +79,15 @@ case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; default: BUG(); } @@ -107,6 +123,12 @@ adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + le32_to_cpu(sos_hdr_v1_1->kdb_offset_bytes); } + if (sos_hdr->header.header_version_minor == 2) { + sos_hdr_v1_2 = (const struct psp_firmware_header_v1_2 *)adev->psp.sos_fw->data; + adev->psp.kdb_bin_size = le32_to_cpu(sos_hdr_v1_2->kdb_size_bytes); + adev->psp.kdb_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr_v1_2->kdb_offset_bytes); + } break; default: dev_err(adev->dev, @@ -158,6 +180,9 @@ } break; case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + case CHIP_ARCTURUS: break; default: BUG(); @@ -473,7 +498,6 @@ } static int psp_v11_0_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index) { diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c 2019-08-31 15:01:11.850736167 -0500 @@ -0,0 +1,565 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v12_0.h" + +#include "mp/mp_12_0_0_offset.h" +#include "mp/mp_12_0_0_sh_mask.h" +#include "gc/gc_9_0_offset.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "nbio/nbio_7_4_offset.h" + +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" + +MODULE_FIRMWARE("amdgpu/renoir_asd.bin"); +/* address block */ +#define smnMP1_FIRMWARE_FLAGS 0x3010024 + +static int psp_v12_0_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + const char *chip_name; + char fw_name[30]; + int err = 0; + const struct psp_firmware_header_v1_0 *asd_hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_RENOIR: + chip_name = "renoir"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); + err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); + if (err) + goto out1; + + err = amdgpu_ucode_validate(adev->psp.asd_fw); + if (err) + goto out1; + + asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; + adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version); + adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version); + adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes); + adev->psp.asd_start_addr = (uint8_t *)asd_hdr + + le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes); + + return 0; + +out1: + release_firmware(adev->psp.asd_fw); + adev->psp.asd_fw = NULL; + + return err; +} + +static int psp_v12_0_bootloader_load_sysdrv(struct psp_context *psp) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) { + psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + printk("sos fw version = 0x%x.\n", psp->sos_fw_version); + return 0; + } + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy PSP System Driver binary to memory */ + memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); + + /* Provide the sys driver to bootloader */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = 1 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + + return ret; +} + +static int psp_v12_0_bootloader_load_sos(struct psp_context *psp) +{ + int ret; + unsigned int psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) + return 0; + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy Secure OS binary to PSP memory */ + memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); + + /* Provide the PSP secure OS to bootloader */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = 2 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), + 0, true); + + return ret; +} + +static void psp_v12_0_reroute_ih(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + uint32_t tmp; + + /* Change IH ring for VMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + + /* Change IH ring for UMC */ + tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); + + mdelay(20); + psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); +} + +static int psp_v12_0_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + psp_v12_0_reroute_ih(psp); + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + return 0; +} + +static bool psp_v12_0_support_vmr_ring(struct psp_context *psp) +{ + if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045) + return true; + return false; +} + +static int psp_v12_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (psp_v12_0_support_vmr_ring(psp)) { + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v12_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + /* Write the ring destroy command*/ + if (psp_v12_0_support_vmr_ring(psp)) + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) */ + if (psp_v12_0_support_vmr_ring(psp)) + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + + return ret; +} + +static int psp_v12_0_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v12_0_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static int psp_v12_0_cmd_submit(struct psp_context *psp, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index) +{ + unsigned int psp_write_ptr_reg = 0; + struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem; + struct psp_ring *ring = &psp->km_ring; + struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; + struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + + ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; + struct amdgpu_device *adev = psp->adev; + uint32_t ring_size_dw = ring->ring_size / 4; + uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; + + /* KM (GPCOM) prepare write pointer */ + if (psp_v12_0_support_vmr_ring(psp)) + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + /* Update KM RB frame pointer to new frame */ + /* write_frame ptr increments by size of rb_frame in bytes */ + /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ + if ((psp_write_ptr_reg % ring_size_dw) == 0) + write_frame = ring_buffer_start; + else + write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); + /* Check invalid write_frame ptr address */ + if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { + DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + DRM_ERROR("write_frame is pointing to address out of bounds\n"); + return -EINVAL; + } + + /* Initialize KM RB frame */ + memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); + + /* Update KM RB frame */ + write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); + write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); + write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); + write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); + write_frame->fence_value = index; + + /* Update the write Pointer in DWORDs */ + psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; + if (psp_v12_0_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + + return 0; +} + +static int +psp_v12_0_sram_map(struct amdgpu_device *adev, + unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) +{ + int ret = 0; + + switch (ucode_id) { +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SMC: + *sram_offset = 0; + *sram_addr_reg_offset = 0; + *sram_data_reg_offset = 0; + break; +#endif + + case AMDGPU_UCODE_ID_CP_CE: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_PFP: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_ME: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC1: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC2: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_RLC_G: + *sram_offset = 0x2000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_SDMA0: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + break; + +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SDMA1: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_UVD: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_VCE: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; +#endif + + case AMDGPU_UCODE_ID_MAXIMUM: + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static bool psp_v12_0_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type) +{ + int err = 0; + unsigned int fw_sram_reg_val = 0; + unsigned int fw_sram_addr_reg_offset = 0; + unsigned int fw_sram_data_reg_offset = 0; + unsigned int ucode_size; + uint32_t *ucode_mem = NULL; + struct amdgpu_device *adev = psp->adev; + + err = psp_v12_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset, + &fw_sram_data_reg_offset, ucode_type); + if (err) + return false; + + WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val); + + ucode_size = ucode->ucode_size; + ucode_mem = (uint32_t *)ucode->kaddr; + while (ucode_size) { + fw_sram_reg_val = RREG32(fw_sram_data_reg_offset); + + if (*ucode_mem != fw_sram_reg_val) + return false; + + ucode_mem++; + /* 4 bytes */ + ucode_size -= 4; + } + + return true; +} + +static int psp_v12_0_mode1_reset(struct psp_context *psp) +{ + int ret; + uint32_t offset; + struct amdgpu_device *adev = psp->adev; + + offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); + + ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + + if (ret) { + DRM_INFO("psp is not working correctly before mode1 reset!\n"); + return -EINVAL; + } + + /*send the mode 1 reset command*/ + WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); + + msleep(500); + + offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); + + ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); + + if (ret) { + DRM_INFO("psp mode 1 reset failed!\n"); + return -EINVAL; + } + + DRM_INFO("psp mode1 reset succeed \n"); + + return 0; +} + +static const struct psp_funcs psp_v12_0_funcs = { + .init_microcode = psp_v12_0_init_microcode, + .bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv, + .bootloader_load_sos = psp_v12_0_bootloader_load_sos, + .ring_init = psp_v12_0_ring_init, + .ring_create = psp_v12_0_ring_create, + .ring_stop = psp_v12_0_ring_stop, + .ring_destroy = psp_v12_0_ring_destroy, + .cmd_submit = psp_v12_0_cmd_submit, + .compare_sram_data = psp_v12_0_compare_sram_data, + .mode1_reset = psp_v12_0_mode1_reset, +}; + +void psp_v12_0_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v12_0_funcs; +} diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v12_0.h 2019-08-31 15:01:12.265736204 -0500 @@ -0,0 +1,30 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V12_0_H__ +#define __PSP_V12_0_H__ + +#include "amdgpu_psp.h" + +void psp_v12_0_set_psp_funcs(struct psp_context *psp); + +#endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c 2019-08-31 15:01:11.850736167 -0500 @@ -411,7 +411,6 @@ } static int psp_v3_1_cmd_submit(struct psp_context *psp, - struct amdgpu_firmware_info *ucode, uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index) { @@ -636,7 +635,7 @@ static bool psp_v3_1_support_vmr_ring(struct psp_context *psp) { - if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version >= 0x80455) + if (amdgpu_sriov_vf(psp->adev)) return true; return false; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 2019-08-31 15:01:11.850736167 -0500 @@ -34,6 +34,18 @@ #include "sdma0/sdma0_4_2_sh_mask.h" #include "sdma1/sdma1_4_2_offset.h" #include "sdma1/sdma1_4_2_sh_mask.h" +#include "sdma2/sdma2_4_2_2_offset.h" +#include "sdma2/sdma2_4_2_2_sh_mask.h" +#include "sdma3/sdma3_4_2_2_offset.h" +#include "sdma3/sdma3_4_2_2_sh_mask.h" +#include "sdma4/sdma4_4_2_2_offset.h" +#include "sdma4/sdma4_4_2_2_sh_mask.h" +#include "sdma5/sdma5_4_2_2_offset.h" +#include "sdma5/sdma5_4_2_2_sh_mask.h" +#include "sdma6/sdma6_4_2_2_offset.h" +#include "sdma6/sdma6_4_2_2_sh_mask.h" +#include "sdma7/sdma7_4_2_2_offset.h" +#include "sdma7/sdma7_4_2_2_sh_mask.h" #include "hdp/hdp_4_0_offset.h" #include "sdma0/sdma0_4_1_default.h" @@ -55,6 +67,8 @@ MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); MODULE_FIRMWARE("amdgpu/picasso_sdma.bin"); MODULE_FIRMWARE("amdgpu/raven2_sdma.bin"); +MODULE_FIRMWARE("amdgpu/arcturus_sdma.bin"); +MODULE_FIRMWARE("amdgpu/renoir_sdma.bin"); #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L @@ -202,25 +216,132 @@ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001) }; +static const struct soc15_reg_golden golden_settings_sdma_arct[] = +{ + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA2, 0, mmSDMA2_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA3, 0, mmSDMA3_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA4, 0, mmSDMA4_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA5, 0, mmSDMA5_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA6, 0, mmSDMA6_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA7, 0, mmSDMA7_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002) +}; + +static const struct soc15_reg_golden golden_settings_sdma_4_3[] = { + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831f07), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00000002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_POWER_CNTL, 0x003fff07, 0x40000051), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), +}; + static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) { - return ( 0 == instance ? (adev->reg_offset[SDMA0_HWIP][0][0] + offset) : - (adev->reg_offset[SDMA1_HWIP][0][0] + offset)); + switch (instance) { + case 0: + return (adev->reg_offset[SDMA0_HWIP][0][0] + offset); + case 1: + return (adev->reg_offset[SDMA1_HWIP][0][0] + offset); + case 2: + return (adev->reg_offset[SDMA2_HWIP][0][1] + offset); + case 3: + return (adev->reg_offset[SDMA3_HWIP][0][1] + offset); + case 4: + return (adev->reg_offset[SDMA4_HWIP][0][1] + offset); + case 5: + return (adev->reg_offset[SDMA5_HWIP][0][1] + offset); + case 6: + return (adev->reg_offset[SDMA6_HWIP][0][1] + offset); + case 7: + return (adev->reg_offset[SDMA7_HWIP][0][1] + offset); + default: + break; + } + return 0; +} + +static unsigned sdma_v4_0_seq_to_irq_id(int seq_num) +{ + switch (seq_num) { + case 0: + return SOC15_IH_CLIENTID_SDMA0; + case 1: + return SOC15_IH_CLIENTID_SDMA1; + case 2: + return SOC15_IH_CLIENTID_SDMA2; + case 3: + return SOC15_IH_CLIENTID_SDMA3; + case 4: + return SOC15_IH_CLIENTID_SDMA4; + case 5: + return SOC15_IH_CLIENTID_SDMA5; + case 6: + return SOC15_IH_CLIENTID_SDMA6; + case 7: + return SOC15_IH_CLIENTID_SDMA7; + default: + break; + } + return -EINVAL; +} + +static int sdma_v4_0_irq_id_to_seq(unsigned client_id) +{ + switch (client_id) { + case SOC15_IH_CLIENTID_SDMA0: + return 0; + case SOC15_IH_CLIENTID_SDMA1: + return 1; + case SOC15_IH_CLIENTID_SDMA2: + return 2; + case SOC15_IH_CLIENTID_SDMA3: + return 3; + case SOC15_IH_CLIENTID_SDMA4: + return 4; + case SOC15_IH_CLIENTID_SDMA5: + return 5; + case SOC15_IH_CLIENTID_SDMA6: + return 6; + case SOC15_IH_CLIENTID_SDMA7: + return 7; + default: + break; + } + return -EINVAL; } static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: - if (!amdgpu_virt_support_skip_setting(adev)) { - soc15_program_register_sequence(adev, - golden_settings_sdma_4, - ARRAY_SIZE(golden_settings_sdma_4)); - soc15_program_register_sequence(adev, - golden_settings_sdma_vg10, - ARRAY_SIZE(golden_settings_sdma_vg10)); - } + soc15_program_register_sequence(adev, + golden_settings_sdma_4, + ARRAY_SIZE(golden_settings_sdma_4)); + soc15_program_register_sequence(adev, + golden_settings_sdma_vg10, + ARRAY_SIZE(golden_settings_sdma_vg10)); break; case CHIP_VEGA12: soc15_program_register_sequence(adev, @@ -241,6 +362,11 @@ golden_settings_sdma1_4_2, ARRAY_SIZE(golden_settings_sdma1_4_2)); break; + case CHIP_ARCTURUS: + soc15_program_register_sequence(adev, + golden_settings_sdma_arct, + ARRAY_SIZE(golden_settings_sdma_arct)); + break; case CHIP_RAVEN: soc15_program_register_sequence(adev, golden_settings_sdma_4_1, @@ -254,11 +380,53 @@ golden_settings_sdma_rv1, ARRAY_SIZE(golden_settings_sdma_rv1)); break; + case CHIP_RENOIR: + soc15_program_register_sequence(adev, + golden_settings_sdma_4_3, + ARRAY_SIZE(golden_settings_sdma_4_3)); + break; default: break; } } +static int sdma_v4_0_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) +{ + int err = 0; + const struct sdma_firmware_header_v1_0 *hdr; + + err = amdgpu_ucode_validate(sdma_inst->fw); + if (err) + return err; + + hdr = (const struct sdma_firmware_header_v1_0 *)sdma_inst->fw->data; + sdma_inst->fw_version = le32_to_cpu(hdr->header.ucode_version); + sdma_inst->feature_version = le32_to_cpu(hdr->ucode_feature_version); + + if (sdma_inst->feature_version >= 20) + sdma_inst->burst_nop = true; + + return 0; +} + +static void sdma_v4_0_destroy_inst_ctx(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.instance[i].fw != NULL) + release_firmware(adev->sdma.instance[i].fw); + + /* arcturus shares the same FW memory across + all SDMA isntances */ + if (adev->asic_type == CHIP_ARCTURUS) + break; + } + + memset((void*)adev->sdma.instance, 0, + sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); +} + /** * sdma_v4_0_init_microcode - load ucode images from disk * @@ -278,7 +446,6 @@ int err = 0, i; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; - const struct sdma_firmware_header_v1_0 *hdr; DRM_DEBUG("\n"); @@ -300,30 +467,52 @@ else chip_name = "raven"; break; + case CHIP_ARCTURUS: + chip_name = "arcturus"; + break; + case CHIP_RENOIR: + chip_name = "renoir"; + break; default: BUG(); } - for (i = 0; i < adev->sdma.num_instances; i++) { - if (i == 0) - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); - else - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma1.bin", chip_name); - err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); - if (err) - goto out; - err = amdgpu_ucode_validate(adev->sdma.instance[i].fw); - if (err) - goto out; - hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; - adev->sdma.instance[i].fw_version = le32_to_cpu(hdr->header.ucode_version); - adev->sdma.instance[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); - if (adev->sdma.instance[i].feature_version >= 20) - adev->sdma.instance[i].burst_nop = true; - DRM_DEBUG("psp_load == '%s'\n", - adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + + err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); + if (err) + goto out; + + err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[0]); + if (err) + goto out; + + for (i = 1; i < adev->sdma.num_instances; i++) { + if (adev->asic_type == CHIP_ARCTURUS) { + /* Acturus will leverage the same FW memory + for every SDMA instance */ + memcpy((void*)&adev->sdma.instance[i], + (void*)&adev->sdma.instance[0], + sizeof(struct amdgpu_sdma_instance)); + } + else { + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); + + err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); + if (err) + goto out; + + err = sdma_v4_0_init_inst_ctx(&adev->sdma.instance[i]); + if (err) + goto out; + } + } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + DRM_DEBUG("psp_load == '%s'\n", + adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + for (i = 0; i < adev->sdma.num_instances; i++) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; info->fw = adev->sdma.instance[i].fw; @@ -332,13 +521,11 @@ ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); } } + out: if (err) { DRM_ERROR("sdma_v4_0: Failed to load firmware \"%s\"\n", fw_name); - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - } + sdma_v4_0_destroy_inst_ctx(adev); } return err; } @@ -561,10 +748,7 @@ u32 ref_and_mask = 0; const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg; - if (ring->me == 0) - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0; - else - ref_and_mask = nbio_hf_reg->ref_and_mask_sdma1; + ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me; sdma_v4_0_wait_reg_mem(ring, 0, 1, adev->nbio_funcs->get_hdp_flush_done_offset(adev), @@ -620,26 +804,27 @@ */ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].ring; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].ring; + struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; - int i; + int i, unset = 0; + + for (i = 0; i < adev->sdma.num_instances; i++) { + sdma[i] = &adev->sdma.instance[i].ring; - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) + if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) { amdgpu_ttm_set_buffer_funcs_status(adev, false); + unset = 1; + } - for (i = 0; i < adev->sdma.num_instances; i++) { rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); - } - sdma0->sched.ready = false; - sdma1->sched.ready = false; + sdma[i]->sched.ready = false; + } } /** @@ -663,16 +848,20 @@ */ static void sdma_v4_0_page_stop(struct amdgpu_device *adev) { - struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page; - struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page; + struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES]; u32 rb_cntl, ib_cntl; int i; - - if ((adev->mman.buffer_funcs_ring == sdma0) || - (adev->mman.buffer_funcs_ring == sdma1)) - amdgpu_ttm_set_buffer_funcs_status(adev, false); + bool unset = false; for (i = 0; i < adev->sdma.num_instances; i++) { + sdma[i] = &adev->sdma.instance[i].page; + + if ((adev->mman.buffer_funcs_ring == sdma[i]) && + (unset == false)) { + amdgpu_ttm_set_buffer_funcs_status(adev, false); + unset = true; + } + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 0); @@ -681,10 +870,9 @@ ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 0); WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); - } - sdma0->sched.ready = false; - sdma1->sched.ready = false; + sdma[i]->sched.ready = false; + } } /** @@ -1018,6 +1206,7 @@ switch (adev->asic_type) { case CHIP_RAVEN: + case CHIP_RENOIR: sdma_v4_1_init_power_gating(adev); sdma_v4_1_update_power_gating(adev, true); break; @@ -1473,8 +1662,10 @@ struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - if (adev->asic_type == CHIP_RAVEN) + if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) adev->sdma.num_instances = 1; + else if (adev->asic_type == CHIP_ARCTURUS) + adev->sdma.num_instances = 8; else adev->sdma.num_instances = 2; @@ -1499,6 +1690,7 @@ } static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry); static int sdma_v4_0_late_init(void *handle) @@ -1518,7 +1710,7 @@ .sub_block_index = 0, .name = "sdma", }; - int r; + int r, i; if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) { amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0); @@ -1575,14 +1767,11 @@ if (r) goto sysfs; resume: - r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); - if (r) - goto irq; - - r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); - if (r) { - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); - goto irq; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + if (r) + goto irq; } return 0; @@ -1606,28 +1795,22 @@ struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_TRAP, - &adev->sdma.trap_irq); - if (r) - return r; - - /* SDMA SRAM ECC event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, SDMA0_4_0__SRCID__SDMA_SRAM_ECC, - &adev->sdma.ecc_irq); - if (r) - return r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_TRAP, + &adev->sdma.trap_irq); + if (r) + return r; + } /* SDMA SRAM ECC event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, SDMA1_4_0__SRCID__SDMA_SRAM_ECC, - &adev->sdma.ecc_irq); - if (r) - return r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = amdgpu_irq_add_id(adev, sdma_v4_0_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_SRAM_ECC, + &adev->sdma.ecc_irq); + if (r) + return r; + } for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; @@ -1641,11 +1824,8 @@ ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1; sprintf(ring->name, "sdma%d", i); - r = amdgpu_ring_init(adev, ring, 1024, - &adev->sdma.trap_irq, - (i == 0) ? - AMDGPU_SDMA_IRQ_INSTANCE0 : - AMDGPU_SDMA_IRQ_INSTANCE1); + r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); if (r) return r; @@ -1663,9 +1843,7 @@ sprintf(ring->name, "page%d", i); r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq, - (i == 0) ? - AMDGPU_SDMA_IRQ_INSTANCE0 : - AMDGPU_SDMA_IRQ_INSTANCE1); + AMDGPU_SDMA_IRQ_INSTANCE0 + i); if (r) return r; } @@ -1701,10 +1879,7 @@ amdgpu_ring_fini(&adev->sdma.instance[i].page); } - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - } + sdma_v4_0_destroy_inst_ctx(adev); return 0; } @@ -1718,7 +1893,8 @@ adev->powerplay.pp_funcs->set_powergating_by_smu) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); - sdma_v4_0_init_golden_registers(adev); + if (!amdgpu_sriov_vf(adev)) + sdma_v4_0_init_golden_registers(adev); r = sdma_v4_0_start(adev); @@ -1728,12 +1904,15 @@ static int sdma_v4_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i; if (amdgpu_sriov_vf(adev)) return 0; - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0); - amdgpu_irq_put(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE1); + for (i = 0; i < adev->sdma.num_instances; i++) { + amdgpu_irq_put(adev, &adev->sdma.ecc_irq, + AMDGPU_SDMA_IRQ_INSTANCE0 + i); + } sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); @@ -1776,15 +1955,17 @@ static int sdma_v4_0_wait_for_idle(void *handle) { - unsigned i; - u32 sdma0, sdma1; + unsigned i, j; + u32 sdma[AMDGPU_MAX_SDMA_INSTANCES]; struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG); - sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG); - - if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) + for (j = 0; j < adev->sdma.num_instances; j++) { + sdma[j] = RREG32_SDMA(j, mmSDMA0_STATUS_REG); + if (!(sdma[j] & SDMA0_STATUS_REG__IDLE_MASK)) + break; + } + if (j == adev->sdma.num_instances) return 0; udelay(1); } @@ -1820,17 +2001,7 @@ uint32_t instance; DRM_DEBUG("IH: SDMA trap\n"); - switch (entry->client_id) { - case SOC15_IH_CLIENTID_SDMA0: - instance = 0; - break; - case SOC15_IH_CLIENTID_SDMA1: - instance = 1; - break; - default: - return 0; - } - + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); switch (entry->ring_id) { case 0: amdgpu_fence_process(&adev->sdma.instance[instance].ring); @@ -1851,20 +2022,15 @@ } static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, + struct ras_err_data *err_data, struct amdgpu_iv_entry *entry) { - uint32_t instance, err_source; + uint32_t err_source; + int instance; - switch (entry->client_id) { - case SOC15_IH_CLIENTID_SDMA0: - instance = 0; - break; - case SOC15_IH_CLIENTID_SDMA1: - instance = 1; - break; - default: + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) return 0; - } switch (entry->src_id) { case SDMA0_4_0__SRCID__SDMA_SRAM_ECC: @@ -1881,7 +2047,7 @@ amdgpu_ras_reset_gpu(adev, 0); - return AMDGPU_RAS_UE; + return AMDGPU_RAS_SUCCESS; } static int sdma_v4_0_process_ecc_irq(struct amdgpu_device *adev, @@ -1910,16 +2076,9 @@ DRM_ERROR("Illegal instruction in SDMA command stream\n"); - switch (entry->client_id) { - case SOC15_IH_CLIENTID_SDMA0: - instance = 0; - break; - case SOC15_IH_CLIENTID_SDMA1: - instance = 1; - break; - default: + instance = sdma_v4_0_irq_id_to_seq(entry->client_id); + if (instance < 0) return 0; - } switch (entry->ring_id) { case 0: @@ -1936,14 +2095,10 @@ { u32 sdma_edc_config; - u32 reg_offset = (type == AMDGPU_SDMA_IRQ_INSTANCE0) ? - sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_EDC_CONFIG) : - sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_EDC_CONFIG); - - sdma_edc_config = RREG32(reg_offset); + sdma_edc_config = RREG32_SDMA(type, mmSDMA0_EDC_CONFIG); sdma_edc_config = REG_SET_FIELD(sdma_edc_config, SDMA0_EDC_CONFIG, ECC_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(reg_offset, sdma_edc_config); + WREG32_SDMA(type, mmSDMA0_EDC_CONFIG, sdma_edc_config); return 0; } @@ -1953,61 +2108,35 @@ bool enable) { uint32_t data, def; + int i; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { - /* enable sdma0 clock gating */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); - data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); - data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); + for (i = 0; i < adev->sdma.num_instances; i++) { + def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); + data &= ~(SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); + WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data); } } else { - /* disable sdma0 clock gating */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL)); - data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); - - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data); - - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL)); - data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE5_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE4_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE3_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE2_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE1_MASK | - SDMA1_CLK_CTRL__SOFT_OVERRIDE0_MASK); + for (i = 0; i < adev->sdma.num_instances; i++) { + def = data = RREG32_SDMA(i, mmSDMA0_CLK_CTRL); + data |= (SDMA0_CLK_CTRL__SOFT_OVERRIDE7_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE6_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE5_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE4_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE3_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE2_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE1_MASK | + SDMA0_CLK_CTRL__SOFT_OVERRIDE0_MASK); if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL), data); + WREG32_SDMA(i, mmSDMA0_CLK_CTRL, data); } } } @@ -2018,34 +2147,23 @@ bool enable) { uint32_t data, def; + int i; if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { - /* 1-not override: enable sdma0 mem light sleep */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); - data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); - - /* 1-not override: enable sdma1 mem light sleep */ - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); - data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + for (i = 0; i < adev->sdma.num_instances; i++) { + /* 1-not override: enable sdma mem light sleep */ + def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL); + data |= SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); + WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data); } } else { - /* 0-override:disable sdma0 mem light sleep */ - def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); - data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; - if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); - - /* 0-override:disable sdma1 mem light sleep */ - if (adev->sdma.num_instances > 1) { - def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL)); - data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; + for (i = 0; i < adev->sdma.num_instances; i++) { + /* 0-override:disable sdma mem light sleep */ + def = data = RREG32_SDMA(0, mmSDMA0_POWER_CNTL); + data &= ~SDMA0_POWER_CNTL__MEM_POWER_OVERRIDE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL), data); + WREG32_SDMA(0, mmSDMA0_POWER_CNTL, data); } } } @@ -2063,6 +2181,8 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: + case CHIP_RENOIR: sdma_v4_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); sdma_v4_0_update_medium_grain_light_sleep(adev, @@ -2133,7 +2253,43 @@ .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_ring_get_wptr, + .set_wptr = sdma_v4_0_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* hdp invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/* + * On Arcturus, SDMA instance 5~7 has a different vmhub type(AMDGPU_MMHUB_1). + * So create a individual constant ring_funcs for those instances. + */ +static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs_2nd_mmhub = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_ring_get_wptr, .set_wptr = sdma_v4_0_ring_set_wptr, @@ -2165,7 +2321,39 @@ .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_page_ring_get_wptr, + .set_wptr = sdma_v4_0_page_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* hdp invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs_2nd_mmhub = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB_1, .get_rptr = sdma_v4_0_ring_get_rptr, .get_wptr = sdma_v4_0_page_ring_get_wptr, .set_wptr = sdma_v4_0_page_ring_set_wptr, @@ -2197,10 +2385,20 @@ int i; for (i = 0; i < adev->sdma.num_instances; i++) { - adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; + if (adev->asic_type == CHIP_ARCTURUS && i >= 5) + adev->sdma.instance[i].ring.funcs = + &sdma_v4_0_ring_funcs_2nd_mmhub; + else + adev->sdma.instance[i].ring.funcs = + &sdma_v4_0_ring_funcs; adev->sdma.instance[i].ring.me = i; if (adev->sdma.has_page_queue) { - adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; + if (adev->asic_type == CHIP_ARCTURUS && i >= 5) + adev->sdma.instance[i].page.funcs = + &sdma_v4_0_page_ring_funcs_2nd_mmhub; + else + adev->sdma.instance[i].page.funcs = + &sdma_v4_0_page_ring_funcs; adev->sdma.instance[i].page.me = i; } } @@ -2224,10 +2422,23 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + switch (adev->sdma.num_instances) { + case 1: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; + break; + case 8: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + break; + case 2: + default: + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; + adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; + break; + } adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v4_0_illegal_inst_irq_funcs; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; adev->sdma.ecc_irq.funcs = &sdma_v4_0_ecc_irq_funcs; } @@ -2293,8 +2504,8 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) { adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; - if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1) - adev->mman.buffer_funcs_ring = &adev->sdma.instance[1].page; + if (adev->sdma.has_page_queue) + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page; else adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } @@ -2313,22 +2524,15 @@ unsigned i; adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; - if (adev->sdma.has_page_queue && adev->sdma.num_instances > 1) { - for (i = 1; i < adev->sdma.num_instances; i++) { + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.has_page_queue) sched = &adev->sdma.instance[i].page.sched; - adev->vm_manager.vm_pte_rqs[i - 1] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances - 1; - adev->vm_manager.page_fault = &adev->sdma.instance[0].page; - } else { - for (i = 0; i < adev->sdma.num_instances; i++) { + else sched = &adev->sdma.instance[i].ring.sched; - adev->vm_manager.vm_pte_rqs[i] = - &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - } - adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 2019-08-31 15:01:11.850736167 -0500 @@ -21,8 +21,11 @@ * */ +#include #include -#include +#include +#include + #include "amdgpu.h" #include "amdgpu_ucode.h" #include "amdgpu_trace.h" @@ -42,6 +45,12 @@ MODULE_FIRMWARE("amdgpu/navi10_sdma.bin"); MODULE_FIRMWARE("amdgpu/navi10_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi14_sdma1.bin"); + +MODULE_FIRMWARE("amdgpu/navi12_sdma.bin"); +MODULE_FIRMWARE("amdgpu/navi12_sdma1.bin"); + #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 #define SDMA0_HYP_DEC_REG_END 0x5893 @@ -59,7 +68,7 @@ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), @@ -71,7 +80,7 @@ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), @@ -80,6 +89,18 @@ }; static const struct soc15_reg_golden golden_settings_sdma_nv10[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000), +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv14[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), +}; + +static const struct soc15_reg_golden golden_settings_sdma_nv12[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), }; static u32 sdma_v5_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset) @@ -111,6 +132,22 @@ golden_settings_sdma_nv10, (const u32)ARRAY_SIZE(golden_settings_sdma_nv10)); break; + case CHIP_NAVI14: + soc15_program_register_sequence(adev, + golden_settings_sdma_5, + (const u32)ARRAY_SIZE(golden_settings_sdma_5)); + soc15_program_register_sequence(adev, + golden_settings_sdma_nv14, + (const u32)ARRAY_SIZE(golden_settings_sdma_nv14)); + break; + case CHIP_NAVI12: + soc15_program_register_sequence(adev, + golden_settings_sdma_5, + (const u32)ARRAY_SIZE(golden_settings_sdma_5)); + soc15_program_register_sequence(adev, + golden_settings_sdma_nv12, + (const u32)ARRAY_SIZE(golden_settings_sdma_nv12)); + break; default: break; } @@ -143,6 +180,12 @@ case CHIP_NAVI10: chip_name = "navi10"; break; + case CHIP_NAVI14: + chip_name = "navi14"; + break; + case CHIP_NAVI12: + chip_name = "navi12"; + break; default: BUG(); } @@ -861,7 +904,7 @@ if (amdgpu_emu_mode == 1) msleep(1); else - DRM_UDELAY(1); + udelay(1); } if (i < adev->usec_timeout) { @@ -1316,7 +1359,7 @@ if (ring->trail_seq == le32_to_cpu(*(ring->trail_fence_cpu_addr))) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) { @@ -1472,6 +1515,8 @@ switch (adev->asic_type) { case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: sdma_v5_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); sdma_v5_0_update_medium_grain_light_sleep(adev, @@ -1532,7 +1577,7 @@ .align_mask = 0xf, .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB, + .vmhub = AMDGPU_GFXHUB_0, .get_rptr = sdma_v5_0_ring_get_rptr, .get_wptr = sdma_v5_0_ring_get_wptr, .set_wptr = sdma_v5_0_ring_set_wptr, @@ -1583,7 +1628,8 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE0 + + adev->sdma.num_instances; adev->sdma.trap_irq.funcs = &sdma_v5_0_trap_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v5_0_illegal_inst_irq_funcs; } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/si.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/si.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/si.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/si.c 2019-08-31 15:01:11.850736167 -0500 @@ -1186,6 +1186,12 @@ return 0; } +static enum amd_reset_method +si_asic_reset_method(struct amdgpu_device *adev) +{ + return AMD_RESET_METHOD_LEGACY; +} + static u32 si_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -1394,6 +1400,7 @@ .read_bios_from_rom = &si_read_bios_from_rom, .read_register = &si_read_register, .reset = &si_asic_reset, + .reset_method = &si_asic_reset_method, .set_vga_state = &si_vga_set_state, .get_xclk = &si_get_xclk, .set_uvd_clocks = &si_set_uvd_clocks, @@ -1881,7 +1888,7 @@ if (orig != data) si_pif_phy1_wreg(adev,PB1_PIF_PWRDOWN_1, data); - if ((adev->family != CHIP_OLAND) && (adev->family != CHIP_HAINAN)) { + if ((adev->asic_type != CHIP_OLAND) && (adev->asic_type != CHIP_HAINAN)) { orig = data = si_pif_phy0_rreg(adev,PB0_PIF_PWRDOWN_0); data &= ~PLL_RAMP_UP_TIME_0_MASK; if (orig != data) @@ -1930,14 +1937,14 @@ orig = data = si_pif_phy0_rreg(adev,PB0_PIF_CNTL); data &= ~LS2_EXIT_TIME_MASK; - if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN)) + if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) data |= LS2_EXIT_TIME(5); if (orig != data) si_pif_phy0_wreg(adev,PB0_PIF_CNTL, data); orig = data = si_pif_phy1_rreg(adev,PB1_PIF_CNTL); data &= ~LS2_EXIT_TIME_MASK; - if ((adev->family == CHIP_OLAND) || (adev->family == CHIP_HAINAN)) + if ((adev->asic_type == CHIP_OLAND) || (adev->asic_type == CHIP_HAINAN)) data |= LS2_EXIT_TIME(5); if (orig != data) si_pif_phy1_wreg(adev,PB1_PIF_CNTL, data); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c 2019-08-31 15:01:11.850736167 -0500 @@ -0,0 +1,724 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "smuio/smuio_11_0_0_offset.h" +#include "smuio/smuio_11_0_0_sh_mask.h" + +#include "smu_v11_0_i2c.h" +#include "amdgpu.h" +#include "soc15_common.h" +#include +#include +#include "amdgpu_amdkfd.h" +#include +#include +#include "amdgpu_ras.h" + +/* error codes */ +#define I2C_OK 0 +#define I2C_NAK_7B_ADDR_NOACK 1 +#define I2C_NAK_TXDATA_NOACK 2 +#define I2C_TIMEOUT 4 +#define I2C_SW_TIMEOUT 8 +#define I2C_ABORT 0x10 + +/* I2C transaction flags */ +#define I2C_NO_STOP 1 +#define I2C_RESTART 2 + +#define to_amdgpu_device(x) (container_of(x, struct amdgpu_ras, eeprom_control.eeprom_accessor))->adev +#define to_eeprom_control(x) container_of(x, struct amdgpu_ras_eeprom_control, eeprom_accessor) + +static void smu_v11_0_i2c_set_clock_gating(struct i2c_adapter *control, bool en) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg = RREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT); + + reg = REG_SET_FIELD(reg, SMUIO_PWRMGT, i2c_clk_gate_en, en ? 1 : 0); + WREG32_SOC15(SMUIO, 0, mmSMUIO_PWRMGT, reg); +} + + +static void smu_v11_0_i2c_enable(struct i2c_adapter *control, bool enable) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, enable ? 1 : 0); +} + +static void smu_v11_0_i2c_clear_status(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + /* do */ + { + RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_INTR); + + } /* while (reg_CKSVII2C_ic_clr_intr == 0) */ +} + +static void smu_v11_0_i2c_configure(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg = 0; + + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_SLAVE_DISABLE, 1); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_RESTART_EN, 1); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_MASTER, 0); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_10BITADDR_SLAVE, 0); + /* Standard mode */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MAX_SPEED_MODE, 2); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_CON, IC_MASTER_MODE, 1); + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CON, reg); +} + +static void smu_v11_0_i2c_set_clock(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* + * Standard mode speed, These values are taken from SMUIO MAS, + * but are different from what is given is + * Synopsys spec. The values here are based on assumption + * that refclock is 100MHz + * + * Configuration for standard mode; Speed = 100kbps + * Scale linearly, for now only support standard speed clock + * This will work only with 100M ref clock + * + * TBD:Change the calculation to take into account ref clock values also. + */ + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_FS_SPKLEN, 2); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_HCNT, 120); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SS_SCL_LCNT, 130); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_SDA_HOLD, 20); +} + +static void smu_v11_0_i2c_set_address(struct i2c_adapter *control, uint8_t address) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* Convert fromr 8-bit to 7-bit address */ + address >>= 1; + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TAR, (address & 0xFF)); +} + +static uint32_t smu_v11_0_i2c_poll_tx_status(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t ret = I2C_OK; + uint32_t reg, reg_c_tx_abrt_source; + + /*Check if transmission is completed */ + unsigned long timeout_counter = jiffies + msecs_to_jiffies(20); + + do { + if (time_after(jiffies, timeout_counter)) { + ret |= I2C_SW_TIMEOUT; + break; + } + + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + + } while (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFE) == 0); + + if (ret != I2C_OK) + return ret; + + /* This only checks if NAK is received and transaction got aborted */ + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_INTR_STAT); + + if (REG_GET_FIELD(reg, CKSVII2C_IC_INTR_STAT, R_TX_ABRT) == 1) { + reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE); + DRM_INFO("TX was terminated, IC_TX_ABRT_SOURCE val is:%x", reg_c_tx_abrt_source); + + /* Check for stop due to NACK */ + if (REG_GET_FIELD(reg_c_tx_abrt_source, + CKSVII2C_IC_TX_ABRT_SOURCE, + ABRT_TXDATA_NOACK) == 1) { + + ret |= I2C_NAK_TXDATA_NOACK; + + } else if (REG_GET_FIELD(reg_c_tx_abrt_source, + CKSVII2C_IC_TX_ABRT_SOURCE, + ABRT_7B_ADDR_NOACK) == 1) { + + ret |= I2C_NAK_7B_ADDR_NOACK; + } else { + ret |= I2C_ABORT; + } + + smu_v11_0_i2c_clear_status(control); + } + + return ret; +} + +static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t ret = I2C_OK; + uint32_t reg_ic_status, reg_c_tx_abrt_source; + + reg_c_tx_abrt_source = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_TX_ABRT_SOURCE); + + /* If slave is not present */ + if (REG_GET_FIELD(reg_c_tx_abrt_source, + CKSVII2C_IC_TX_ABRT_SOURCE, + ABRT_7B_ADDR_NOACK) == 1) { + ret |= I2C_NAK_7B_ADDR_NOACK; + + smu_v11_0_i2c_clear_status(control); + } else { /* wait till some data is there in RXFIFO */ + /* Poll for some byte in RXFIFO */ + unsigned long timeout_counter = jiffies + msecs_to_jiffies(20); + + do { + if (time_after(jiffies, timeout_counter)) { + ret |= I2C_SW_TIMEOUT; + break; + } + + reg_ic_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + + } while (REG_GET_FIELD(reg_ic_status, CKSVII2C_IC_STATUS, RFNE) == 0); + } + + return ret; +} + + + + +/** + * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device. + * + * @address: The I2C address of the slave device. + * @data: The data to transmit over the bus. + * @numbytes: The amount of data to transmit. + * @i2c_flag: Flags for transmission + * + * Returns 0 on success or error. + */ +static uint32_t smu_v11_0_i2c_transmit(struct i2c_adapter *control, + uint8_t address, uint8_t *data, + uint32_t numbytes, uint32_t i2c_flag) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t bytes_sent, reg, ret = 0; + unsigned long timeout_counter; + + bytes_sent = 0; + + DRM_DEBUG_DRIVER("I2C_Transmit(), address = %x, bytes = %d , data: ", + (uint16_t)address, numbytes); + + if (drm_debug & DRM_UT_DRIVER) { + print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, + 16, 1, data, numbytes, false); + } + + /* Set the I2C slave address */ + smu_v11_0_i2c_set_address(control, address); + /* Enable I2C */ + smu_v11_0_i2c_enable(control, true); + + /* Clear status bits */ + smu_v11_0_i2c_clear_status(control); + + + timeout_counter = jiffies + msecs_to_jiffies(20); + + while (numbytes > 0) { + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + if (REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)) { + do { + reg = 0; + /* + * Prepare transaction, no need to set RESTART. I2C engine will send + * START as soon as it sees data in TXFIFO + */ + if (bytes_sent == 0) + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, + (i2c_flag & I2C_RESTART) ? 1 : 0); + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, data[bytes_sent]); + + /* determine if we need to send STOP bit or not */ + if (numbytes == 1) + /* Final transaction, so send stop unless I2C_NO_STOP */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, + (i2c_flag & I2C_NO_STOP) ? 0 : 1); + /* Write */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 0); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); + + /* Record that the bytes were transmitted */ + bytes_sent++; + numbytes--; + + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_STATUS); + + } while (numbytes && REG_GET_FIELD(reg, CKSVII2C_IC_STATUS, TFNF)); + } + + /* + * We waited too long for the transmission FIFO to become not-full. + * Exit the loop with error. + */ + if (time_after(jiffies, timeout_counter)) { + ret |= I2C_SW_TIMEOUT; + goto Err; + } + } + + ret = smu_v11_0_i2c_poll_tx_status(control); + +Err: + /* Any error, no point in proceeding */ + if (ret != I2C_OK) { + if (ret & I2C_SW_TIMEOUT) + DRM_ERROR("TIMEOUT ERROR !!!"); + + if (ret & I2C_NAK_7B_ADDR_NOACK) + DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!"); + + + if (ret & I2C_NAK_TXDATA_NOACK) + DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!"); + } + + return ret; +} + + +/** + * smu_v11_0_i2c_receive - Receive a block of data over the I2C bus from a slave device. + * + * @address: The I2C address of the slave device. + * @numbytes: The amount of data to transmit. + * @i2c_flag: Flags for transmission + * + * Returns 0 on success or error. + */ +static uint32_t smu_v11_0_i2c_receive(struct i2c_adapter *control, + uint8_t address, uint8_t *data, + uint32_t numbytes, uint8_t i2c_flag) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t bytes_received, ret = I2C_OK; + + bytes_received = 0; + + /* Set the I2C slave address */ + smu_v11_0_i2c_set_address(control, address); + + /* Enable I2C */ + smu_v11_0_i2c_enable(control, true); + + while (numbytes > 0) { + uint32_t reg = 0; + + smu_v11_0_i2c_clear_status(control); + + + /* Prepare transaction */ + + /* Each time we disable I2C, so this is not a restart */ + if (bytes_received == 0) + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, RESTART, + (i2c_flag & I2C_RESTART) ? 1 : 0); + + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT, 0); + /* Read */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, CMD, 1); + + /* Transmitting last byte */ + if (numbytes == 1) + /* Final transaction, so send stop if requested */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_DATA_CMD, STOP, + (i2c_flag & I2C_NO_STOP) ? 0 : 1); + + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD, reg); + + ret = smu_v11_0_i2c_poll_rx_status(control); + + /* Any error, no point in proceeding */ + if (ret != I2C_OK) { + if (ret & I2C_SW_TIMEOUT) + DRM_ERROR("TIMEOUT ERROR !!!"); + + if (ret & I2C_NAK_7B_ADDR_NOACK) + DRM_ERROR("Received I2C_NAK_7B_ADDR_NOACK !!!"); + + if (ret & I2C_NAK_TXDATA_NOACK) + DRM_ERROR("Received I2C_NAK_TXDATA_NOACK !!!"); + + break; + } + + reg = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_DATA_CMD); + data[bytes_received] = REG_GET_FIELD(reg, CKSVII2C_IC_DATA_CMD, DAT); + + /* Record that the bytes were received */ + bytes_received++; + numbytes--; + } + + DRM_DEBUG_DRIVER("I2C_Receive(), address = %x, bytes = %d, data :", + (uint16_t)address, bytes_received); + + if (drm_debug & DRM_UT_DRIVER) { + print_hex_dump(KERN_INFO, "data: ", DUMP_PREFIX_NONE, + 16, 1, data, bytes_received, false); + } + + return ret; +} + +static void smu_v11_0_i2c_abort(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg = 0; + + /* Enable I2C engine; */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ENABLE, 1); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg); + + /* Abort previous transaction */ + reg = REG_SET_FIELD(reg, CKSVII2C_IC_ENABLE, ABORT, 1); + WREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE, reg); + + DRM_DEBUG_DRIVER("I2C_Abort() Done."); +} + + +static bool smu_v11_0_i2c_activity_done(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + const uint32_t IDLE_TIMEOUT = 1024; + uint32_t timeout_count = 0; + uint32_t reg_ic_enable, reg_ic_enable_status, reg_ic_clr_activity; + + reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); + reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); + + + if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && + (REG_GET_FIELD(reg_ic_enable_status, CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { + /* + * Nobody is using I2C engine, but engine remains active because + * someone missed to send STOP + */ + smu_v11_0_i2c_abort(control); + } else if (REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) { + /* Nobody is using I2C engine */ + return true; + } + + /* Keep reading activity bit until it's cleared */ + do { + reg_ic_clr_activity = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_CLR_ACTIVITY); + + if (REG_GET_FIELD(reg_ic_clr_activity, + CKSVII2C_IC_CLR_ACTIVITY, CLR_ACTIVITY) == 0) + return true; + + ++timeout_count; + + } while (timeout_count < IDLE_TIMEOUT); + + return false; +} + +static void smu_v11_0_i2c_init(struct i2c_adapter *control) +{ + /* Disable clock gating */ + smu_v11_0_i2c_set_clock_gating(control, false); + + if (!smu_v11_0_i2c_activity_done(control)) + DRM_WARN("I2C busy !"); + + /* Disable I2C */ + smu_v11_0_i2c_enable(control, false); + + /* Configure I2C to operate as master and in standard mode */ + smu_v11_0_i2c_configure(control); + + /* Initialize the clock to 50 kHz default */ + smu_v11_0_i2c_set_clock(control); + +} + +static void smu_v11_0_i2c_fini(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + uint32_t reg_ic_enable_status, reg_ic_enable; + + smu_v11_0_i2c_enable(control, false); + + /* Double check if disabled, else force abort */ + reg_ic_enable_status = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE_STATUS); + reg_ic_enable = RREG32_SOC15(SMUIO, 0, mmCKSVII2C_IC_ENABLE); + + if ((REG_GET_FIELD(reg_ic_enable, CKSVII2C_IC_ENABLE, ENABLE) == 0) && + (REG_GET_FIELD(reg_ic_enable_status, + CKSVII2C_IC_ENABLE_STATUS, IC_EN) == 1)) { + /* + * Nobody is using I2C engine, but engine remains active because + * someone missed to send STOP + */ + smu_v11_0_i2c_abort(control); + } + + /* Restore clock gating */ + smu_v11_0_i2c_set_clock_gating(control, true); + +} + +static bool smu_v11_0_i2c_bus_lock(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* Send PPSMC_MSG_RequestI2CBus */ + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access) + goto Fail; + + + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle, true)) + return true; + +Fail: + return false; +} + +static bool smu_v11_0_i2c_bus_unlock(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + + /* Send PPSMC_MSG_RequestI2CBus */ + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access) + goto Fail; + + /* Send PPSMC_MSG_ReleaseI2CBus */ + if (!adev->powerplay.pp_funcs->smu_i2c_bus_access(adev->powerplay.pp_handle, + false)) + return true; + +Fail: + return false; +} + +/***************************** EEPROM I2C GLUE ****************************/ + +static uint32_t smu_v11_0_i2c_eeprom_read_data(struct i2c_adapter *control, + uint8_t address, + uint8_t *data, + uint32_t numbytes) +{ + uint32_t ret = 0; + + /* First 2 bytes are dummy write to set EEPROM address */ + ret = smu_v11_0_i2c_transmit(control, address, data, 2, I2C_NO_STOP); + if (ret != I2C_OK) + goto Fail; + + /* Now read data starting with that address */ + ret = smu_v11_0_i2c_receive(control, address, data + 2, numbytes - 2, + I2C_RESTART); + +Fail: + if (ret != I2C_OK) + DRM_ERROR("ReadData() - I2C error occurred :%x", ret); + + return ret; +} + +static uint32_t smu_v11_0_i2c_eeprom_write_data(struct i2c_adapter *control, + uint8_t address, + uint8_t *data, + uint32_t numbytes) +{ + uint32_t ret; + + ret = smu_v11_0_i2c_transmit(control, address, data, numbytes, 0); + + if (ret != I2C_OK) + DRM_ERROR("WriteI2CData() - I2C error occurred :%x", ret); + else + /* + * According to EEPROM spec there is a MAX of 10 ms required for + * EEPROM to flush internal RX buffer after STOP was issued at the + * end of write transaction. During this time the EEPROM will not be + * responsive to any more commands - so wait a bit more. + * + * TODO Improve to wait for first ACK for slave address after + * internal write cycle done. + */ + msleep(10); + + return ret; + +} + +static void lock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ + struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c); + + if (!smu_v11_0_i2c_bus_lock(i2c)) { + DRM_ERROR("Failed to lock the bus from SMU"); + return; + } + + control->bus_locked = true; +} + +static int trylock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ + WARN_ONCE(1, "This operation not supposed to run in atomic context!"); + return false; +} + +static void unlock_bus(struct i2c_adapter *i2c, unsigned int flags) +{ + struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c); + + if (!smu_v11_0_i2c_bus_unlock(i2c)) { + DRM_ERROR("Failed to unlock the bus from SMU"); + return; + } + + control->bus_locked = false; +} + +static const struct i2c_lock_operations smu_v11_0_i2c_i2c_lock_ops = { + .lock_bus = lock_bus, + .trylock_bus = trylock_bus, + .unlock_bus = unlock_bus, +}; + +static int smu_v11_0_i2c_eeprom_i2c_xfer(struct i2c_adapter *i2c_adap, + struct i2c_msg *msgs, int num) +{ + int i, ret; + struct amdgpu_ras_eeprom_control *control = to_eeprom_control(i2c_adap); + + if (!control->bus_locked) { + DRM_ERROR("I2C bus unlocked, stopping transaction!"); + return -EIO; + } + + smu_v11_0_i2c_init(i2c_adap); + + for (i = 0; i < num; i++) { + if (msgs[i].flags & I2C_M_RD) + ret = smu_v11_0_i2c_eeprom_read_data(i2c_adap, + (uint8_t)msgs[i].addr, + msgs[i].buf, msgs[i].len); + else + ret = smu_v11_0_i2c_eeprom_write_data(i2c_adap, + (uint8_t)msgs[i].addr, + msgs[i].buf, msgs[i].len); + + if (ret != I2C_OK) { + num = -EIO; + break; + } + } + + smu_v11_0_i2c_fini(i2c_adap); + return num; +} + +static u32 smu_v11_0_i2c_eeprom_i2c_func(struct i2c_adapter *adap) +{ + return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL; +} + + +static const struct i2c_algorithm smu_v11_0_i2c_eeprom_i2c_algo = { + .master_xfer = smu_v11_0_i2c_eeprom_i2c_xfer, + .functionality = smu_v11_0_i2c_eeprom_i2c_func, +}; + +int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + int res; + + control->owner = THIS_MODULE; + control->class = I2C_CLASS_SPD; + control->dev.parent = &adev->pdev->dev; + control->algo = &smu_v11_0_i2c_eeprom_i2c_algo; + snprintf(control->name, sizeof(control->name), "RAS EEPROM"); + control->lock_ops = &smu_v11_0_i2c_i2c_lock_ops; + + res = i2c_add_adapter(control); + if (res) + DRM_ERROR("Failed to register hw i2c, err: %d\n", res); + + return res; +} + +void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control) +{ + i2c_del_adapter(control); +} + +/* + * Keep this for future unit test if bugs arise + */ +#if 0 +#define I2C_TARGET_ADDR 0xA0 + +bool smu_v11_0_i2c_test_bus(struct i2c_adapter *control) +{ + + uint32_t ret = I2C_OK; + uint8_t data[6] = {0xf, 0, 0xde, 0xad, 0xbe, 0xef}; + + + DRM_INFO("Begin"); + + if (!smu_v11_0_i2c_bus_lock(control)) { + DRM_ERROR("Failed to lock the bus!."); + return false; + } + + smu_v11_0_i2c_init(control); + + /* Write 0xde to address 0x0000 on the EEPROM */ + ret = smu_v11_0_i2c_eeprom_write_data(control, I2C_TARGET_ADDR, data, 6); + + ret = smu_v11_0_i2c_eeprom_read_data(control, I2C_TARGET_ADDR, data, 6); + + smu_v11_0_i2c_fini(control); + + smu_v11_0_i2c_bus_unlock(control); + + + DRM_INFO("End"); + return true; +} +#endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.h 2019-08-31 15:01:11.851736168 -0500 @@ -0,0 +1,34 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef SMU_V11_I2C_CONTROL_H +#define SMU_V11_I2C_CONTROL_H + +#include + +struct i2c_adapter; + +int smu_v11_0_i2c_eeprom_control_init(struct i2c_adapter *control); +void smu_v11_0_i2c_eeprom_control_fini(struct i2c_adapter *control); + +#endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15.c 2019-08-31 15:08:53.346776943 -0500 @@ -63,6 +63,8 @@ #include "uvd_v7_0.h" #include "vce_v4_0.h" #include "vcn_v1_0.h" +#include "vcn_v2_0.h" +#include "vcn_v2_5.h" #include "dce_virtual.h" #include "mxgpu_ai.h" #include "amdgpu_smu.h" @@ -115,6 +117,49 @@ spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } +static u64 soc15_pcie_rreg64(struct amdgpu_device *adev, u32 reg) +{ + unsigned long flags, address, data; + u64 r; + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* read low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + r = RREG32(data); + + /* read high 32 bit*/ + WREG32(address, reg + 4); + (void)RREG32(address); + r |= ((u64)RREG32(data) << 32); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); + return r; +} + +static void soc15_pcie_wreg64(struct amdgpu_device *adev, u32 reg, u64 v) +{ + unsigned long flags, address, data; + + address = adev->nbio_funcs->get_pcie_index_offset(adev); + data = adev->nbio_funcs->get_pcie_data_offset(adev); + + spin_lock_irqsave(&adev->pcie_idx_lock, flags); + /* write low 32 bit */ + WREG32(address, reg); + (void)RREG32(address); + WREG32(data, (u32)(v & 0xffffffffULL)); + (void)RREG32(data); + + /* write high 32 bit */ + WREG32(address, reg + 4); + (void)RREG32(address); + WREG32(data, (u32)(v >> 32)); + (void)RREG32(data); + spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); +} + static u32 soc15_uvd_ctx_rreg(struct amdgpu_device *adev, u32 reg) { unsigned long flags, address, data; @@ -464,12 +509,23 @@ return 0; } -static int soc15_asic_reset(struct amdgpu_device *adev) +static int soc15_mode2_reset(struct amdgpu_device *adev) +{ + if (!adev->powerplay.pp_funcs || + !adev->powerplay.pp_funcs->asic_reset_mode_2) + return -ENOENT; + + return adev->powerplay.pp_funcs->asic_reset_mode_2(adev->powerplay.pp_handle); +} + +static enum amd_reset_method +soc15_asic_reset_method(struct amdgpu_device *adev) { - int ret; bool baco_reset; switch (adev->asic_type) { + case CHIP_RAVEN: + return AMD_RESET_METHOD_MODE2; case CHIP_VEGA10: case CHIP_VEGA12: soc15_asic_get_baco_capability(adev, &baco_reset); @@ -493,11 +549,23 @@ } if (baco_reset) - ret = soc15_asic_baco_reset(adev); + return AMD_RESET_METHOD_BACO; else - ret = soc15_asic_mode1_reset(adev); + return AMD_RESET_METHOD_MODE1; +} - return ret; +static int soc15_asic_reset(struct amdgpu_device *adev) +{ + switch (soc15_asic_reset_method(adev)) { + case AMD_RESET_METHOD_BACO: + amdgpu_inc_vram_lost(adev); + return soc15_asic_baco_reset(adev); + case AMD_RESET_METHOD_MODE2: + return soc15_mode2_reset(adev); + default: + amdgpu_inc_vram_lost(adev); + return soc15_asic_mode1_reset(adev); + } } /*static int soc15_set_uvd_clock(struct amdgpu_device *adev, u32 clock, @@ -581,26 +649,31 @@ case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_RAVEN: + case CHIP_RENOIR: vega10_reg_base_init(adev); break; case CHIP_VEGA20: vega20_reg_base_init(adev); break; + case CHIP_ARCTURUS: + arct_reg_base_init(adev); + break; default: return -EINVAL; } - if (adev->asic_type == CHIP_VEGA20) + if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->gmc.xgmi.supported = true; if (adev->flags & AMD_IS_APU) adev->nbio_funcs = &nbio_v7_0_funcs; - else if (adev->asic_type == CHIP_VEGA20) + else if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) adev->nbio_funcs = &nbio_v7_4_funcs; else adev->nbio_funcs = &nbio_v6_1_funcs; - if (adev->asic_type == CHIP_VEGA20) + if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->df_funcs = &df_v3_6_funcs; else adev->df_funcs = &df_v1_7_funcs; @@ -672,6 +745,37 @@ #endif amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block); break; + case CHIP_ARCTURUS: + amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); + amdgpu_device_ip_block_add(adev, &vcn_v2_5_ip_block); + break; + case CHIP_RENOIR: + amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); + if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) + amdgpu_device_ip_block_add(adev, &psp_v12_0_ip_block); + if (is_support_sw_smu(adev)) + amdgpu_device_ip_block_add(adev, &smu_v12_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); +#if defined(CONFIG_DRM_AMD_DC) + else if (amdgpu_device_has_dc_support(adev)) + amdgpu_device_ip_block_add(adev, &dm_ip_block); +#else +# warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." +#endif + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + break; default: return -EINVAL; } @@ -688,7 +792,7 @@ struct amdgpu_ring *ring) { if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1); + WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); else amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); @@ -714,14 +818,9 @@ /* Set the 2 events that we wish to watch, defined above */ /* Reg 40 is # received msgs */ + /* Reg 104 is # of posted requests sent */ perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40); - /* Pre-VG20, Reg 104 is # of posted requests sent. On VG20 it's 108 */ - if (adev->asic_type == CHIP_VEGA20) - perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, - EVENT1_SEL, 108); - else - perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, - EVENT1_SEL, 104); + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104); /* Write to enable desired perf counters */ WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr); @@ -751,6 +850,55 @@ *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32); } +static void vega20_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, + uint64_t *count1) +{ + uint32_t perfctr = 0; + uint64_t cnt0_of, cnt1_of; + int tmp; + + /* This reports 0 on APUs, so return to avoid writing/reading registers + * that may or may not be different from their GPU counterparts + */ + if (adev->flags & AMD_IS_APU) + return; + + /* Set the 2 events that we wish to watch, defined above */ + /* Reg 40 is # received msgs */ + /* Reg 108 is # of posted requests sent on VG20 */ + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, + EVENT0_SEL, 40); + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK3, + EVENT1_SEL, 108); + + /* Write to enable desired perf counters */ + WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3, perfctr); + /* Zero out and enable the perf counters + * Write 0x5: + * Bit 0 = Start all counters(1) + * Bit 2 = Global counter reset enable(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005); + + msleep(1000); + + /* Load the shadow and disable the perf counters + * Write 0x2: + * Bit 0 = Stop counters(0) + * Bit 1 = Load the shadow counters(1) + */ + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002); + + /* Read register values to get any >32bit overflow */ + tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK3); + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER0_UPPER); + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK3, COUNTER1_UPPER); + + /* Get the values and add the overflow */ + *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK3) | (cnt0_of << 32); + *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK3) | (cnt1_of << 32); +} + static bool soc15_need_reset_on_init(struct amdgpu_device *adev) { u32 sol_reg; @@ -792,6 +940,7 @@ .read_bios_from_rom = &soc15_read_bios_from_rom, .read_register = &soc15_read_register, .reset = &soc15_asic_reset, + .reset_method = &soc15_asic_reset_method, .set_vga_state = &soc15_vga_set_state, .get_xclk = &soc15_get_xclk, .set_uvd_clocks = &soc15_set_uvd_clocks, @@ -821,9 +970,10 @@ .invalidate_hdp = &soc15_invalidate_hdp, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &vega20_doorbell_index_init, - .get_pcie_usage = &soc15_get_pcie_usage, + .get_pcie_usage = &vega20_get_pcie_usage, .need_reset_on_init = &soc15_need_reset_on_init, .get_pcie_replay_count = &soc15_get_pcie_replay_count, + .reset_method = &soc15_asic_reset_method }; static int soc15_common_early_init(void *handle) @@ -837,6 +987,8 @@ adev->smc_wreg = NULL; adev->pcie_rreg = &soc15_pcie_rreg; adev->pcie_wreg = &soc15_pcie_wreg; + adev->pcie_rreg64 = &soc15_pcie_rreg64; + adev->pcie_wreg64 = &soc15_pcie_wreg64; adev->uvd_ctx_rreg = &soc15_uvd_ctx_rreg; adev->uvd_ctx_wreg = &soc15_uvd_ctx_wreg; adev->didt_rreg = &soc15_didt_rreg; @@ -993,6 +1145,53 @@ adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; } break; + case CHIP_ARCTURUS: + adev->asic_funcs = &vega20_asic_funcs; + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS; + adev->pg_flags = 0; + adev->external_rev_id = adev->rev_id + 0x32; + break; + case CHIP_RENOIR: + adev->asic_funcs = &soc15_asic_funcs; + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_DF_MGCG; + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; + adev->external_rev_id = adev->rev_id + 0x91; + + if (adev->pm.pp_feature & PP_GFXOFF_MASK) + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_RLC_SMU_HS; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -1038,21 +1237,18 @@ int i; struct amdgpu_ring *ring; - /* Two reasons to skip - * 1, Host driver already programmed them - * 2, To avoid registers program violations in SR-IOV - */ - if (!amdgpu_virt_support_skip_setting(adev)) { + /* sdma/ih doorbell range are programed by hypervisor */ + if (!amdgpu_sriov_vf(adev)) { for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index, adev->doorbell_index.sdma_doorbell_range); } - } - adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, + adev->nbio_funcs->ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); + } } static int soc15_common_hw_init(void *handle) @@ -1129,7 +1325,8 @@ { uint32_t def, data; - if (adev->asic_type == CHIP_VEGA20) { + if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) { def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL)); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) @@ -1248,6 +1445,7 @@ state == AMD_CG_STATE_GATE ? true : false); break; case CHIP_RAVEN: + case CHIP_RENOIR: adev->nbio_funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); adev->nbio_funcs->update_medium_grain_light_sleep(adev, @@ -1261,6 +1459,10 @@ soc15_update_rom_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; + case CHIP_ARCTURUS: + soc15_update_hdp_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; default: break; } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15_common.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15_common.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15_common.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15_common.h 2019-08-31 15:01:11.851736168 -0500 @@ -69,9 +69,10 @@ } \ } while (0) +#define AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(a) (amdgpu_sriov_vf((a)) && !amdgpu_sriov_runtime((a))) #define WREG32_RLC(reg, value) \ do { \ - if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \ uint32_t i = 0; \ uint32_t retries = 50000; \ uint32_t r0 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0; \ @@ -96,7 +97,7 @@ #define WREG32_SOC15_RLC_SHADOW(ip, inst, reg, value) \ do { \ uint32_t target_reg = adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg;\ - if (amdgpu_virt_support_rlc_prg_reg(adev)) { \ + if (AMDGPU_VIRT_SUPPORT_RLC_PRG_REG(adev)) { \ uint32_t r2 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2; \ uint32_t r3 = adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3; \ uint32_t grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; \ diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/soc15.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/soc15.h 2019-08-31 15:01:11.851736168 -0500 @@ -77,6 +77,7 @@ int vega10_reg_base_init(struct amdgpu_device *adev); int vega20_reg_base_init(struct amdgpu_device *adev); +int arct_reg_base_init(struct amdgpu_device *adev); void vega10_doorbell_index_init(struct amdgpu_device *adev); void vega20_doorbell_index_init(struct amdgpu_device *adev); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c 2019-08-31 15:01:11.851736168 -0500 @@ -0,0 +1,255 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v6_1.h" +#include "amdgpu_ras.h" +#include "amdgpu.h" + +#include "rsmu/rsmu_0_0_2_offset.h" +#include "rsmu/rsmu_0_0_2_sh_mask.h" +#include "umc/umc_6_1_1_offset.h" +#include "umc/umc_6_1_1_sh_mask.h" + +#define smnMCA_UMC0_MCUMC_ADDRT0 0x50f10 + +/* + * (addr / 256) * 8192, the higher 26 bits in ErrorAddr + * is the index of 8KB block + */ +#define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) +/* channel index is the index of 256B block */ +#define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) +/* offset in 256B block */ +#define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) + +const uint32_t + umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { + {2, 18, 11, 27}, {4, 20, 13, 29}, + {1, 17, 8, 24}, {7, 23, 14, 30}, + {10, 26, 3, 19}, {12, 28, 5, 21}, + {9, 25, 0, 16}, {15, 31, 6, 22} +}; + +static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev, + uint32_t umc_instance) +{ + uint32_t rsmu_umc_index; + + rsmu_umc_index = RREG32_SOC15(RSMU, 0, + mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 1); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_INSTANCE, umc_instance); + rsmu_umc_index = REG_SET_FIELD(rsmu_umc_index, + RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_WREN, 1 << umc_instance); + WREG32_SOC15(RSMU, 0, mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + rsmu_umc_index); +} + +static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) +{ + WREG32_FIELD15(RSMU, 0, RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, + RSMU_UMC_INDEX_MODE_EN, 0); +} + +static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + /* select the lower chip and check the error count */ + ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - + UMC_V6_1_CE_CNT_INIT); + /* clear the lower chip err count */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + + /* select the higher chip and check the err counter */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + ecc_err_cnt = RREG32(ecc_err_cnt_addr + umc_reg_offset); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - + UMC_V6_1_CE_CNT_INIT); + /* clear the higher chip err count */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + + /* check for SRAM correctable error + MCUMC_STATUS is a 64 bit register */ + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) + *error_count += 1; +} + +static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint64_t mc_umc_status; + uint32_t mc_umc_status_addr; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + /* check the MCUMC_STATUS */ + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + *error_count += 1; +} + +static void umc_v6_1_query_error_count(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint32_t umc_reg_offset, + uint32_t channel_index) +{ + umc_v6_1_query_correctable_error_count(adev, umc_reg_offset, + &(err_data->ce_count)); + umc_v6_1_querry_uncorrectable_error_count(adev, umc_reg_offset, + &(err_data->ue_count)); +} + +static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_umc_for_each_channel(umc_v6_1_query_error_count); +} + +static void umc_v6_1_query_error_address(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, uint32_t channel_index) +{ + uint32_t lsb, mc_umc_status_addr; + uint64_t mc_umc_status, err_addr; + + mc_umc_status_addr = + SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); + + /* skip error address process if -ENOMEM */ + if (!err_data->err_addr) { + /* clear umc status */ + WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); + return; + } + + mc_umc_status = RREG64_UMC(mc_umc_status_addr + umc_reg_offset); + + /* calculate error address if ue/ce error is detected */ + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && + (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { + err_addr = RREG64_PCIE(smnMCA_UMC0_MCUMC_ADDRT0 + umc_reg_offset * 4); + + /* the lowest lsb bits should be ignored */ + lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); + err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); + err_addr &= ~((0x1ULL << lsb) - 1); + + /* translate umc channel address to soc pa, 3 parts are included */ + err_data->err_addr[err_data->err_addr_cnt] = + ADDR_OF_8KB_BLOCK(err_addr) | + ADDR_OF_256B_BLOCK(channel_index) | + OFFSET_IN_256B_BLOCK(err_addr); + + err_data->err_addr_cnt++; + } + + /* clear umc status */ + WREG64_UMC(mc_umc_status_addr + umc_reg_offset, 0x0ULL); +} + +static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_umc_for_each_channel(umc_v6_1_query_error_address); +} + +static void umc_v6_1_ras_init_per_channel(struct amdgpu_device *adev, + struct ras_err_data *err_data, + uint32_t umc_reg_offset, uint32_t channel_index) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); + + /* select the lower chip and check the error count */ + ecc_err_cnt_sel = RREG32(ecc_err_cnt_sel_addr + umc_reg_offset); + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 0); + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrInt, 0x1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); + + /* select the higher chip and check the err counter */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, + EccErrCntCsSel, 1); + WREG32(ecc_err_cnt_sel_addr + umc_reg_offset, ecc_err_cnt_sel); + WREG32(ecc_err_cnt_addr + umc_reg_offset, UMC_V6_1_CE_CNT_INIT); +} + +static void umc_v6_1_ras_init(struct amdgpu_device *adev) +{ + void *ras_error_status = NULL; + + amdgpu_umc_for_each_channel(umc_v6_1_ras_init_per_channel); +} + +const struct amdgpu_umc_funcs umc_v6_1_funcs = { + .ras_init = umc_v6_1_ras_init, + .query_ras_error_count = umc_v6_1_query_ras_error_count, + .query_ras_error_address = umc_v6_1_query_ras_error_address, + .enable_umc_index_mode = umc_v6_1_enable_umc_index_mode, + .disable_umc_index_mode = umc_v6_1_disable_umc_index_mode, +}; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h 2019-08-31 15:01:11.851736168 -0500 @@ -0,0 +1,51 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V6_1_H__ +#define __UMC_V6_1_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* HBM Memory Channel Width */ +#define UMC_V6_1_HBM_MEMORY_CHANNEL_WIDTH 128 +/* number of umc channel instance with memory map register access */ +#define UMC_V6_1_CHANNEL_INSTANCE_NUM 4 +/* number of umc instance with memory map register access */ +#define UMC_V6_1_UMC_INSTANCE_NUM 8 +/* total channel instances in one umc block */ +#define UMC_V6_1_TOTAL_CHANNEL_NUM (UMC_V6_1_CHANNEL_INSTANCE_NUM * UMC_V6_1_UMC_INSTANCE_NUM) +/* UMC regiser per channel offset */ +#define UMC_V6_1_PER_CHANNEL_OFFSET 0x800 + +/* EccErrCnt max value */ +#define UMC_V6_1_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UMC_V6_1_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) + +extern const struct amdgpu_umc_funcs umc_v6_1_funcs; +extern const uint32_t + umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; + +#endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c 2019-08-31 15:01:11.851736168 -0500 @@ -1763,7 +1763,7 @@ .align_mask = 0xf, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_ring_get_rptr, .get_wptr = uvd_v7_0_ring_get_wptr, .set_wptr = uvd_v7_0_ring_set_wptr, @@ -1796,7 +1796,7 @@ .nop = HEVC_ENC_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = uvd_v7_0_enc_ring_get_rptr, .get_wptr = uvd_v7_0_enc_ring_get_wptr, .set_wptr = uvd_v7_0_enc_ring_set_wptr, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c 2019-08-31 15:01:11.851736168 -0500 @@ -1070,7 +1070,7 @@ .nop = VCE_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vce_v4_0_ring_get_rptr, .get_wptr = vce_v4_0_ring_get_wptr, .set_wptr = vce_v4_0_ring_set_wptr, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c 2019-08-31 15:01:11.851736168 -0500 @@ -63,6 +63,7 @@ { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->vcn.num_vcn_inst = 1; adev->vcn.num_enc_rings = 2; vcn_v1_0_set_dec_ring_funcs(adev); @@ -87,20 +88,21 @@ struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCN DEC TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_1_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst->irq); if (r) return r; /* VCN ENC TRAP */ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE, - &adev->vcn.irq); + &adev->vcn.inst->irq); if (r) return r; } /* VCN JPEG TRAP */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.irq); + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 126, &adev->vcn.inst->irq); if (r) return r; @@ -122,39 +124,39 @@ if (r) return r; - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; sprintf(ring->name, "vcn_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; - adev->vcn.internal.scratch9 = adev->vcn.external.scratch9 = + adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); - adev->vcn.internal.data0 = adev->vcn.external.data0 = + adev->vcn.internal.data0 = adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); - adev->vcn.internal.data1 = adev->vcn.external.data1 = + adev->vcn.internal.data1 = adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); - adev->vcn.internal.cmd = adev->vcn.external.cmd = + adev->vcn.internal.cmd = adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); - adev->vcn.internal.nop = adev->vcn.external.nop = + adev->vcn.internal.nop = adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; sprintf(ring->name, "vcn_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; sprintf(ring->name, "vcn_jpeg"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; - adev->vcn.internal.jpeg_pitch = adev->vcn.external.jpeg_pitch = + adev->vcn.internal.jpeg_pitch = adev->vcn.inst->external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); return 0; @@ -191,7 +193,7 @@ static int vcn_v1_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i, r; r = amdgpu_ring_test_helper(ring); @@ -199,14 +201,14 @@ goto done; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; ring->sched.ready = true; r = amdgpu_ring_test_helper(ring); if (r) goto done; } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; r = amdgpu_ring_test_helper(ring); if (r) goto done; @@ -229,7 +231,7 @@ static int vcn_v1_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || RREG32_SOC15(VCN, 0, mmUVD_STATUS)) @@ -304,9 +306,9 @@ offset = 0; } else { WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr)); + lower_32_bits(adev->vcn.inst->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr)); + upper_32_bits(adev->vcn.inst->gpu_addr)); offset = size; WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); @@ -316,17 +318,17 @@ /* cache window 1: stack */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); /* cache window 2: context */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); @@ -374,9 +376,9 @@ offset = 0; } else { WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + lower_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + upper_32_bits(adev->vcn.inst->gpu_addr), 0xFFFFFFFF, 0); offset = size; WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0); @@ -386,9 +388,9 @@ /* cache window 1: stack */ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0, 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE, @@ -396,10 +398,10 @@ /* cache window 2: context */ WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE, @@ -779,7 +781,7 @@ */ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; int i, j, r; @@ -932,21 +934,21 @@ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); @@ -968,7 +970,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; @@ -1106,7 +1108,7 @@ ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); /* initialize JPEG wptr */ - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); /* copy patch commands to the jpeg ring */ @@ -1255,21 +1257,21 @@ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); /* Restore */ - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, @@ -1315,7 +1317,7 @@ UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); /* Restore */ - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | @@ -1329,7 +1331,7 @@ WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, @@ -1596,7 +1598,7 @@ { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); else return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); @@ -1613,7 +1615,7 @@ { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); else return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); @@ -1630,7 +1632,7 @@ { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); else @@ -2114,16 +2116,16 @@ switch (entry->src_id) { case 124: - amdgpu_fence_process(&adev->vcn.ring_dec); + amdgpu_fence_process(&adev->vcn.inst->ring_dec); break; case 119: - amdgpu_fence_process(&adev->vcn.ring_enc[0]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]); break; case 120: - amdgpu_fence_process(&adev->vcn.ring_enc[1]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); break; case 126: - amdgpu_fence_process(&adev->vcn.ring_jpeg); + amdgpu_fence_process(&adev->vcn.inst->ring_jpeg); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -2198,7 +2200,7 @@ .align_mask = 0xf, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_dec_ring_get_rptr, .get_wptr = vcn_v1_0_dec_ring_get_wptr, .set_wptr = vcn_v1_0_dec_ring_set_wptr, @@ -2232,7 +2234,7 @@ .nop = VCN_ENC_CMD_NO_OP, .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v1_0_enc_ring_get_rptr, .get_wptr = vcn_v1_0_enc_ring_get_wptr, .set_wptr = vcn_v1_0_enc_ring_set_wptr, @@ -2264,7 +2266,7 @@ .nop = PACKET0(0x81ff, 0), .support_64bit_ptrs = false, .no_user_fence = true, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .extra_dw = 64, .get_rptr = vcn_v1_0_jpeg_ring_get_rptr, .get_wptr = vcn_v1_0_jpeg_ring_get_wptr, @@ -2295,7 +2297,7 @@ static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; + adev->vcn.inst->ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; DRM_INFO("VCN decode is enabled in VM mode\n"); } @@ -2304,14 +2306,14 @@ int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) - adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; + adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; DRM_INFO("VCN encode is enabled in VM mode\n"); } static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; + adev->vcn.inst->ring_jpeg.funcs = &vcn_v1_0_jpeg_ring_vm_funcs; DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); } @@ -2322,8 +2324,8 @@ static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; - adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; + adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs; } const struct amdgpu_ip_block_version vcn_v1_0_ip_block = diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c 2019-08-31 15:08:26.826774600 -0500 @@ -22,7 +22,7 @@ */ #include -#include + #include "amdgpu.h" #include "amdgpu_vcn.h" #include "soc15.h" @@ -92,6 +92,7 @@ { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->vcn.num_vcn_inst = 1; adev->vcn.num_enc_rings = 2; vcn_v2_0_set_dec_ring_funcs(adev); @@ -118,7 +119,7 @@ /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, - &adev->vcn.irq); + &adev->vcn.inst->irq); if (r) return r; @@ -126,15 +127,14 @@ for (i = 0; i < adev->vcn.num_enc_rings; ++i) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, - &adev->vcn.irq); + &adev->vcn.inst->irq); if (r) return r; } /* VCN JPEG TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_2_0__SRCID__JPEG_DECODE, - &adev->vcn.irq); + VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst->irq); if (r) return r; @@ -156,49 +156,56 @@ if (r) return r; - ring = &adev->vcn.ring_dec; + ring = &adev->vcn.inst->ring_dec; ring->use_doorbell = true; ring->doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 << 1; sprintf(ring->name, "vcn_dec"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; + adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; - adev->vcn.external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); + adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; - adev->vcn.external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; - adev->vcn.external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; - adev->vcn.external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; - adev->vcn.external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); + adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i; sprintf(ring->name, "vcn_enc%d", i); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; ring->use_doorbell = true; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1; sprintf(ring->name, "vcn_jpeg"); - r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst->irq, 0); if (r) return r; adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode; adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; - adev->vcn.external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); + adev->vcn.inst->external.jpeg_pitch = SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH); return 0; } @@ -234,11 +241,11 @@ static int vcn_v2_0_hw_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i, r; adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, - ring->doorbell_index); + ring->doorbell_index, 0); ring->sched.ready = true; r = amdgpu_ring_test_ring(ring); @@ -248,7 +255,7 @@ } for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; ring->sched.ready = true; r = amdgpu_ring_test_ring(ring); if (r) { @@ -257,7 +264,7 @@ } } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; ring->sched.ready = true; r = amdgpu_ring_test_ring(ring); if (r) { @@ -283,7 +290,7 @@ static int vcn_v2_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; int i; if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || @@ -294,11 +301,11 @@ ring->sched.ready = false; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { - ring = &adev->vcn.ring_enc[i]; + ring = &adev->vcn.inst->ring_enc[i]; ring->sched.ready = false; } - ring = &adev->vcn.ring_jpeg; + ring = &adev->vcn.inst->ring_jpeg; ring->sched.ready = false; return 0; @@ -368,9 +375,9 @@ offset = 0; } else { WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr)); + lower_32_bits(adev->vcn.inst->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr)); + upper_32_bits(adev->vcn.inst->gpu_addr)); offset = size; WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); @@ -380,17 +387,17 @@ /* cache window 1: stack */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); /* cache window 2: context */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); @@ -426,10 +433,10 @@ } else { WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.gpu_addr), 0, indirect); + lower_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.gpu_addr), 0, indirect); + upper_32_bits(adev->vcn.inst->gpu_addr), 0, indirect); offset = size; WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), @@ -447,10 +454,10 @@ if (!indirect) { WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); + lower_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.gpu_addr + offset), 0, indirect); + upper_32_bits(adev->vcn.inst->gpu_addr + offset), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); } else { @@ -467,10 +474,10 @@ /* cache window 2: context */ WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + lower_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); + upper_32_bits(adev->vcn.inst->gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); WREG32_SOC15_DPG_MODE_2_0(SOC15_DPG_MODE_OFFSET_2_0( @@ -658,7 +665,7 @@ */ static int jpeg_v2_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_jpeg; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_jpeg; uint32_t tmp; int r = 0; @@ -920,7 +927,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; vcn_v2_0_enable_static_power_gating(adev); @@ -1046,7 +1053,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = &adev->vcn.ring_dec; + struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; int i, j, r; @@ -1197,14 +1204,14 @@ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); @@ -1351,14 +1358,14 @@ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); /* Restore */ - ring = &adev->vcn.ring_enc[0]; + ring = &adev->vcn.inst->ring_enc[0]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - ring = &adev->vcn.ring_enc[1]; + ring = &adev->vcn.inst->ring_enc[1]; WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); @@ -1480,11 +1487,13 @@ * * Write a start command to the ring. */ -static void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) +void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); } @@ -1495,9 +1504,11 @@ * * Write a end command to the ring. */ -static void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) +void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1)); } @@ -1508,14 +1519,15 @@ * * Write a nop command to the ring. */ -static void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { + struct amdgpu_device *adev = ring->adev; int i; WARN_ON(ring->wptr % 2 || count % 2); for (i = 0; i < count / 2; i++) { - amdgpu_ring_write(ring, PACKET0(mmUVD_NO_OP_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0)); amdgpu_ring_write(ring, 0); } } @@ -1528,30 +1540,31 @@ * * Write a fence and a trap command to the ring. */ -static void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags) +void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) { - WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + struct amdgpu_device *adev = ring->adev; - amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID_INTERNAL_OFFSET, 0)); + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0)); amdgpu_ring_write(ring, seq); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, addr & 0xffffffff); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1)); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1)); } @@ -1564,44 +1577,46 @@ * * Write ring commands to execute the indirect buffer */ -static void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) +void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) { + struct amdgpu_device *adev = ring->adev; unsigned vmid = AMDGPU_JOB_GET_VMID(job); - amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0)); amdgpu_ring_write(ring, vmid); - amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0)); amdgpu_ring_write(ring, ib->length_dw); } -static void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val, - uint32_t mask) +void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, reg << 2); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, val); - amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0)); amdgpu_ring_write(ring, mask); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1)); } -static void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) +void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t data0, data1, mask; @@ -1615,16 +1630,18 @@ vcn_v2_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); } -static void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val) +void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val) { - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET, 0)); + struct amdgpu_device *adev = ring->adev; + + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); amdgpu_ring_write(ring, reg << 2); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); amdgpu_ring_write(ring, val); - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1)); } @@ -1640,7 +1657,7 @@ { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) + if (ring == &adev->vcn.inst->ring_enc[0]) return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); else return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); @@ -1657,7 +1674,7 @@ { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) { + if (ring == &adev->vcn.inst->ring_enc[0]) { if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; else @@ -1681,7 +1698,7 @@ { struct amdgpu_device *adev = ring->adev; - if (ring == &adev->vcn.ring_enc[0]) { + if (ring == &adev->vcn.inst->ring_enc[0]) { if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); @@ -1706,8 +1723,8 @@ * * Write enc a fence and a trap command to the ring. */ -static void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, - u64 seq, unsigned flags) +void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) { WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -1718,7 +1735,7 @@ amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP); } -static void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) +void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, VCN_ENC_CMD_END); } @@ -1731,10 +1748,10 @@ * * Write enc ring commands to execute the indirect buffer */ -static void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) +void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); @@ -1745,9 +1762,8 @@ amdgpu_ring_write(ring, ib->length_dw); } -static void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val, - uint32_t mask) +void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) { amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); amdgpu_ring_write(ring, reg << 2); @@ -1755,8 +1771,8 @@ amdgpu_ring_write(ring, val); } -static void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vmid, uint64_t pd_addr) +void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; @@ -1767,8 +1783,7 @@ lower_32_bits(pd_addr), 0xffffffff); } -static void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val) +void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); amdgpu_ring_write(ring, reg << 2); @@ -1832,7 +1847,7 @@ * * Write a start command to the ring. */ -static void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) +void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1850,7 +1865,7 @@ * * Write a end command to the ring. */ -static void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) +void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring) { amdgpu_ring_write(ring, PACKETJ(mmUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1869,8 +1884,8 @@ * * Write a fence and a trap command to the ring. */ -static void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, - unsigned flags) +void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) { WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); @@ -1918,10 +1933,10 @@ * * Write ring commands to execute the indirect buffer. */ -static void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_job *job, - struct amdgpu_ib *ib, - uint32_t flags) +void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) { unsigned vmid = AMDGPU_JOB_GET_VMID(job); @@ -1969,9 +1984,8 @@ amdgpu_ring_write(ring, 0x2); } -static void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val, - uint32_t mask) +void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) { uint32_t reg_offset = (reg << 2); @@ -1997,8 +2011,8 @@ amdgpu_ring_write(ring, mask); } -static void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) +void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; uint32_t data0, data1, mask; @@ -2012,8 +2026,7 @@ vcn_v2_0_jpeg_ring_emit_reg_wait(ring, data0, data1, mask); } -static void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, - uint32_t reg, uint32_t val) +void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) { uint32_t reg_offset = (reg << 2); @@ -2031,7 +2044,7 @@ amdgpu_ring_write(ring, val); } -static void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) +void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count) { int i; @@ -2059,16 +2072,16 @@ switch (entry->src_id) { case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: - amdgpu_fence_process(&adev->vcn.ring_dec); + amdgpu_fence_process(&adev->vcn.inst->ring_dec); break; case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: - amdgpu_fence_process(&adev->vcn.ring_enc[0]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[0]); break; case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: - amdgpu_fence_process(&adev->vcn.ring_enc[1]); + amdgpu_fence_process(&adev->vcn.inst->ring_enc[1]); break; case VCN_2_0__SRCID__JPEG_DECODE: - amdgpu_fence_process(&adev->vcn.ring_jpeg); + amdgpu_fence_process(&adev->vcn.inst->ring_jpeg); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", @@ -2086,20 +2099,20 @@ unsigned i; int r; - WREG32(adev->vcn.external.scratch9, 0xCAFEDEAD); + WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 4); if (r) return r; - amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(adev->vcn.external.scratch9); + tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9); if (tmp == 0xDEADBEEF) break; - DRM_UDELAY(1); + udelay(1); } if (i >= adev->usec_timeout) @@ -2158,7 +2171,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_dec_ring_get_rptr, .get_wptr = vcn_v2_0_dec_ring_get_wptr, .set_wptr = vcn_v2_0_dec_ring_set_wptr, @@ -2189,7 +2202,7 @@ .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_enc_ring_get_rptr, .get_wptr = vcn_v2_0_enc_ring_get_wptr, .set_wptr = vcn_v2_0_enc_ring_set_wptr, @@ -2218,7 +2231,7 @@ static const struct amdgpu_ring_funcs vcn_v2_0_jpeg_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_JPEG, .align_mask = 0xf, - .vmhub = AMDGPU_MMHUB, + .vmhub = AMDGPU_MMHUB_0, .get_rptr = vcn_v2_0_jpeg_ring_get_rptr, .get_wptr = vcn_v2_0_jpeg_ring_get_wptr, .set_wptr = vcn_v2_0_jpeg_ring_set_wptr, @@ -2247,7 +2260,7 @@ static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; + adev->vcn.inst->ring_dec.funcs = &vcn_v2_0_dec_ring_vm_funcs; DRM_INFO("VCN decode is enabled in VM mode\n"); } @@ -2256,14 +2269,14 @@ int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) - adev->vcn.ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; + adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; DRM_INFO("VCN encode is enabled in VM mode\n"); } static void vcn_v2_0_set_jpeg_ring_funcs(struct amdgpu_device *adev) { - adev->vcn.ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs; + adev->vcn.inst->ring_jpeg.funcs = &vcn_v2_0_jpeg_ring_vm_funcs; DRM_INFO("VCN jpeg decode is enabled in VM mode\n"); } @@ -2274,8 +2287,8 @@ static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; - adev->vcn.irq.funcs = &vcn_v2_0_irq_funcs; + adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs; } const struct amdgpu_ip_block_version vcn_v2_0_ip_block = diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.h 2019-08-31 15:01:11.851736168 -0500 @@ -24,6 +24,44 @@ #ifndef __VCN_V2_0_H__ #define __VCN_V2_0_H__ +extern void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring); +extern void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); +extern void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags); +extern void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +extern void vcn_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr); +extern void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, + uint32_t reg, uint32_t val); + +extern void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags); +extern void vcn_v2_0_enc_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +extern void vcn_v2_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr); +extern void vcn_v2_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); + +extern void vcn_v2_0_jpeg_ring_insert_start(struct amdgpu_ring *ring); +extern void vcn_v2_0_jpeg_ring_insert_end(struct amdgpu_ring *ring); +extern void vcn_v2_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags); +extern void vcn_v2_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, uint32_t flags); +extern void vcn_v2_0_jpeg_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); +extern void vcn_v2_0_jpeg_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vmid, uint64_t pd_addr); +extern void vcn_v2_0_jpeg_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); +extern void vcn_v2_0_jpeg_ring_nop(struct amdgpu_ring *ring, uint32_t count); + extern const struct amdgpu_ip_block_version vcn_v2_0_ip_block; #endif /* __VCN_V2_0_H__ */ diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c 2019-08-31 15:01:11.852736168 -0500 @@ -0,0 +1,1414 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include + +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "soc15.h" +#include "soc15d.h" +#include "vcn_v2_0.h" + +#include "vcn/vcn_2_5_offset.h" +#include "vcn/vcn_2_5_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" + +#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27 +#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f +#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10 +#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11 +#define mmUVD_NO_OP_INTERNAL_OFFSET 0x29 +#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66 +#define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d + +#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4 +#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5 +#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c + +#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f + +#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2 + +static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev); +static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state); + +static int amdgpu_ih_clientid_vcns[] = { + SOC15_IH_CLIENTID_VCN, + SOC15_IH_CLIENTID_VCN1 +}; + +/** + * vcn_v2_5_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int vcn_v2_5_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_ARCTURUS) { + u32 harvest; + int i; + + adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + harvest = RREG32_SOC15(UVD, i, mmCC_UVD_HARVESTING); + if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) + adev->vcn.harvest_config |= 1 << i; + } + + if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | + AMDGPU_VCN_HARVEST_VCN1)) + /* both instances are harvested, disable the block */ + return -ENOENT; + } else + adev->vcn.num_vcn_inst = 1; + + adev->vcn.num_enc_rings = 2; + + vcn_v2_5_set_dec_ring_funcs(adev); + vcn_v2_5_set_enc_ring_funcs(adev); + vcn_v2_5_set_jpeg_ring_funcs(adev); + vcn_v2_5_set_irq_funcs(adev); + + return 0; +} + +/** + * vcn_v2_5_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v2_5_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int i, j, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + /* VCN DEC TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq); + if (r) + return r; + + /* VCN ENC TRAP */ + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq); + if (r) + return r; + } + + /* VCN JPEG TRAP */ + r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], + VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq); + if (r) + return r; + } + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + const struct common_firmware_header *hdr; + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + + if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) { + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1; + adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + } + DRM_INFO("PSP loading VCN firmware\n"); + } + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + + adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; + adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(UVD, j, mmUVD_SCRATCH9); + adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; + adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA0); + adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; + adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_DATA1); + adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; + adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(UVD, j, mmUVD_GPCOM_VCPU_CMD); + adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; + adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP); + + adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; + adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH); + + ring = &adev->vcn.inst[j].ring_dec; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j; + sprintf(ring->name, "vcn_dec_%d", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[j].ring_enc[i]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j; + sprintf(ring->name, "vcn_enc_%d.%d", j, i); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + } + + ring = &adev->vcn.inst[j].ring_jpeg; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j; + sprintf(ring->name, "vcn_jpeg_%d", j); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0); + if (r) + return r; + } + + return 0; +} + +/** + * vcn_v2_5_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v2_5_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v2_5_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v2_5_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i, j, r; + + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + ring = &adev->vcn.inst[j].ring_dec; + + adev->nbio_funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ring->doorbell_index, j); + + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[j].ring_enc[i]; + ring->sched.ready = false; + continue; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } + } + + ring = &adev->vcn.inst[j].ring_jpeg; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->sched.ready = false; + goto done; + } + } +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully.\n"); + + return r; +} + +/** + * vcn_v2_5_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v2_5_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + ring = &adev->vcn.inst[i].ring_dec; + + if (RREG32_SOC15(VCN, i, mmUVD_STATUS)) + vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + + ring->sched.ready = false; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.inst[i].ring_enc[i]; + ring->sched.ready = false; + } + + ring = &adev->vcn.inst[i].ring_jpeg; + ring->sched.ready = false; + } + + return 0; +} + +/** + * vcn_v2_5_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v2_5_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v2_5_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v2_5_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v2_5_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v2_5_hw_init(adev); + + return r; +} + +/** + * vcn_v2_5_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = size; + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + } +} + +/** + * vcn_v2_5_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Disable clock gating for VCN block + */ +static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data; + int ret = 0; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* UVD disable CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__MMSCH_MASK); + + WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); + + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, ret); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + /* turn on */ + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__UVD_SC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); + } +} + +/** + * vcn_v2_5_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Enable clock gating for VCN block + */ +static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* enable UVD CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); + } +} + +/** + * jpeg_v2_5_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v2_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t tmp; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + ring = &adev->vcn.inst[i].ring_jpeg; + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); + tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); + tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK + | JPEG_CGC_GATE__JPEG2_DEC_MASK + | JPEG_CGC_GATE__JMCIF_MASK + | JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL); + tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK + | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK + | JPEG_CGC_CTRL__JMCIF_MODE_MASK + | JPEG_CGC_CTRL__JRBBM_MODE_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp); + + /* MJPEG global tiling registers */ + WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR); + } + + return 0; +} + +/** + * jpeg_v2_5_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v2_5_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE); + tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK + |JPEG_CGC_GATE__JPEG2_DEC_MASK + |JPEG_CGC_GATE__JMCIF_MASK + |JPEG_CGC_GATE__JRBBM_MASK); + WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + } + + return 0; +} + +static int vcn_v2_5_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + uint32_t rb_bufsz, tmp; + int i, j, k, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* set uvd status busy */ + tmp = RREG32_SOC15(UVD, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(UVD, i, mmUVD_STATUS, tmp); + } + + /*SW clock gating */ + vcn_v2_5_disable_clock_gating(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* setup mmUVD_LMI_CTRL */ + tmp = RREG32_SOC15(UVD, i, mmUVD_LMI_CTRL); + tmp &= ~0xff; + WREG32_SOC15(UVD, i, mmUVD_LMI_CTRL, tmp | 0x8| + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup mmUVD_MPC_CNTL */ + tmp = RREG32_SOC15(UVD, i, mmUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup mmUVD_MPC_SET_MUX */ + WREG32_SOC15(UVD, i, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + } + + vcn_v2_5_mc_resume(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* VCN global tiling registers */ + WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (k = 0; k < 10; ++k) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(UVD, i, mmUVD_STATUS); + if (status & 2) + break; + if (amdgpu_emu_mode == 1) + msleep(500); + else + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("VCN decode not responding, giving up!!!\n"); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_VMID, 0); + + ring = &adev->vcn.inst[i].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_CNTL, tmp); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, i, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, i, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(UVD, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vcn.inst[i].ring_enc[1]; + WREG32_SOC15(UVD, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + } + r = jpeg_v2_5_start(adev); + + return r; +} + +static int vcn_v2_5_stop(struct amdgpu_device *adev) +{ + uint32_t tmp; + int i, r; + + r = jpeg_v2_5_stop(adev); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + /* wait for vcn idle */ + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* block LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); + + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* clear status */ + WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); + + vcn_v2_5_enable_clock_gating(adev); + + /* enable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + } + + return 0; +} + +/** + * vcn_v2_5_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_RPTR); +} + +/** + * vcn_v2_5_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR); +} + +/** + * vcn_v2_5_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = vcn_v2_5_dec_ring_get_rptr, + .get_wptr = vcn_v2_5_dec_ring_get_wptr, + .set_wptr = vcn_v2_5_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */ + .emit_ib = vcn_v2_0_dec_ring_emit_ib, + .emit_fence = vcn_v2_0_dec_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ib = amdgpu_vcn_dec_ring_test_ib, + .insert_nop = vcn_v2_0_dec_ring_insert_nop, + .insert_start = vcn_v2_0_dec_ring_insert_start, + .insert_end = vcn_v2_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v2_5_enc_ring_get_rptr - get enc read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc read pointer + */ +static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR); + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_RPTR2); +} + +/** + * vcn_v2_5_enc_ring_get_wptr - get enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc write pointer + */ +static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR); + } else { + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2); + } +} + +/** + * vcn_v2_5_enc_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + } + } else { + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + } + } +} + +static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = vcn_v2_5_enc_ring_get_rptr, + .get_wptr = vcn_v2_5_enc_ring_get_wptr, + .set_wptr = vcn_v2_5_enc_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_enc_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v2_5_jpeg_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR); +} + +/** + * vcn_v2_5_jpeg_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + else + return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR); +} + +/** + * vcn_v2_5_jpeg_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .vmhub = AMDGPU_MMHUB_1, + .get_rptr = vcn_v2_5_jpeg_ring_get_rptr, + .get_wptr = vcn_v2_5_jpeg_ring_get_wptr, + .set_wptr = vcn_v2_5_jpeg_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */ + 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */ + .emit_ib = vcn_v2_0_jpeg_ring_emit_ib, + .emit_fence = vcn_v2_0_jpeg_ring_emit_fence, + .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_jpeg_ring_test_ring, + .test_ib = amdgpu_vcn_jpeg_ring_test_ib, + .insert_nop = vcn_v2_0_jpeg_ring_nop, + .insert_start = vcn_v2_0_jpeg_ring_insert_start, + .insert_end = vcn_v2_0_jpeg_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg, + .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs; + adev->vcn.inst[i].ring_dec.me = i; + DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); + } +} + +static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) +{ + int i, j; + + for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { + if (adev->vcn.harvest_config & (1 << j)) + continue; + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; + adev->vcn.inst[j].ring_enc[i].me = j; + } + DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j); + } +} + +static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs; + adev->vcn.inst[i].ring_jpeg.me = i; + DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i); + } +} + +static bool vcn_v2_5_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); + } + + return ret; +} + +static int vcn_v2_5_wait_for_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE, ret); + if (ret) + return ret; + } + + return ret; +} + +static int vcn_v2_5_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + if (enable) { + if (vcn_v2_5_is_idle(handle)) + return -EBUSY; + vcn_v2_5_enable_clock_gating(adev); + } else { + vcn_v2_5_disable_clock_gating(adev); + } + + return 0; +} + +static int vcn_v2_5_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int ret; + + if(state == adev->vcn.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v2_5_stop(adev); + else + ret = vcn_v2_5_start(adev); + + if(!ret) + adev->vcn.cur_state = state; + + return ret; +} + +static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t ip_instance; + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_VCN: + ip_instance = 0; + break; + case SOC15_IH_CLIENTID_VCN1: + ip_instance = 1; + break; + default: + DRM_ERROR("Unhandled client id: %d\n", entry->client_id); + return 0; + } + + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec); + break; + case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); + break; + case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]); + break; + case VCN_2_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = { + .set = vcn_v2_5_set_interrupt_state, + .process = vcn_v2_5_process_interrupt, +}; + +static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; + } +} + +static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { + .name = "vcn_v2_5", + .early_init = vcn_v2_5_early_init, + .late_init = NULL, + .sw_init = vcn_v2_5_sw_init, + .sw_fini = vcn_v2_5_sw_fini, + .hw_init = vcn_v2_5_hw_init, + .hw_fini = vcn_v2_5_hw_fini, + .suspend = vcn_v2_5_suspend, + .resume = vcn_v2_5_resume, + .is_idle = vcn_v2_5_is_idle, + .wait_for_idle = vcn_v2_5_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v2_5_set_clockgating_state, + .set_powergating_state = vcn_v2_5_set_powergating_state, +}; + +const struct amdgpu_ip_block_version vcn_v2_5_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 2, + .minor = 5, + .rev = 0, + .funcs = &vcn_v2_5_ip_funcs, +}; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.h 2019-08-31 15:01:11.852736168 -0500 @@ -0,0 +1,29 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V2_5_H__ +#define __VCN_V2_5_H__ + +extern const struct amdgpu_ip_block_version vcn_v2_5_ip_block; + +#endif /* __VCN_V2_5_H__ */ diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega10_ih.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega10_ih.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega10_ih.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega10_ih.c 2019-08-31 15:01:11.852736168 -0500 @@ -50,7 +50,7 @@ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); return; @@ -64,7 +64,7 @@ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -80,7 +80,7 @@ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); @@ -106,7 +106,7 @@ ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); return; @@ -125,7 +125,7 @@ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, RB_ENABLE, 0); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -145,7 +145,7 @@ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, RB_ENABLE, 0); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); @@ -219,7 +219,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) { struct amdgpu_ih_ring *ih; - u32 ih_rb_cntl; + u32 ih_rb_cntl, ih_chicken; int ret = 0; u32 tmp; @@ -234,11 +234,17 @@ WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff); ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); + if (adev->irq.ih.use_bus_addr) { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); + } else { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_FBPA_ENABLE, 1); + } ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); return -ETIMEDOUT; @@ -247,6 +253,11 @@ WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); } + if ((adev->asic_type == CHIP_ARCTURUS + && adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) + || adev->asic_type == CHIP_RENOIR) + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); + /* set the writeback address whether it's enabled or not */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr)); @@ -272,7 +283,7 @@ WPTR_OVERFLOW_ENABLE, 0); ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); @@ -299,7 +310,7 @@ ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - if (amdgpu_virt_support_psp_prg_ih_reg(adev)) { + if (amdgpu_sriov_vf(adev)) { if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, ih_rb_cntl)) { DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c 2019-08-31 15:01:11.852736168 -0500 @@ -81,6 +81,10 @@ adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_DOORBELL64_VCN0_1; + adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_DOORBELL64_VCN2_3; + adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_DOORBELL64_VCN4_5; + adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_DOORBELL64_VCN6_7; adev->doorbell_index.first_non_cp = AMDGPU_DOORBELL64_FIRST_NON_CP; adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 2019-08-31 15:01:11.852736168 -0500 @@ -50,6 +50,8 @@ adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); + adev->reg_offset[UMC_HWIP][i] = (uint32_t *)(&(UMC_BASE.instance[i])); + adev->reg_offset[RSMU_HWIP][i] = (uint32_t *)(&(RSMU_BASE.instance[i])); } return 0; } @@ -85,6 +87,10 @@ adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3; adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5; adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7; + adev->doorbell_index.vcn.vcn_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCN0_1; + adev->doorbell_index.vcn.vcn_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCN2_3; + adev->doorbell_index.vcn.vcn_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCN4_5; + adev->doorbell_index.vcn.vcn_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCN6_7; adev->doorbell_index.first_non_cp = AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP; adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vi.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vi.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdgpu/vi.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdgpu/vi.c 2019-08-31 15:01:11.852736168 -0500 @@ -711,6 +711,12 @@ return r; } +static enum amd_reset_method +vi_asic_reset_method(struct amdgpu_device *adev) +{ + return AMD_RESET_METHOD_LEGACY; +} + static u32 vi_get_config_memsize(struct amdgpu_device *adev) { return RREG32(mmCONFIG_MEMSIZE); @@ -1023,6 +1029,7 @@ .read_bios_from_rom = &vi_read_bios_from_rom, .read_register = &vi_read_register, .reset = &vi_asic_reset, + .reset_method = &vi_asic_reset_method, .set_vga_state = &vi_vga_set_state, .get_xclk = &vi_get_xclk, .set_uvd_clocks = &vi_set_uvd_clocks, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm 2019-08-31 15:01:11.853736168 -0500 @@ -20,1105 +20,947 @@ * OTHER DEALINGS IN THE SOFTWARE. */ +var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 +var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 +var SQ_WAVE_STATUS_HALT_MASK = 0x2000 -shader main - -asic(DEFAULT) - -type(CS) +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 4 +var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24 +var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4 +var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11 +var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1 + +var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 +var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF +var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 +var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 +var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 + +var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 +var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 +var SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT = 25 +var SQ_WAVE_IB_STS_REPLAY_W64H_SIZE = 1 +var SQ_WAVE_IB_STS_REPLAY_W64H_MASK = 0x02000000 +var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1 +var SQ_WAVE_IB_STS_RCNT_SIZE = 6 +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x003F8000 +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF + +var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 +var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 + +// bits [31:24] unused by SPI debug data +var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31 +var TTMP11_SAVE_REPLAY_W64H_MASK = 0x80000000 +var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 24 +var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0x7F000000 + +// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] +// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE +var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 +var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC + +var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 +var S_SAVE_SPI_INIT_ATC_SHIFT = 27 +var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 +var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 +var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 +var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_SAVE_PC_HI_RCNT_SHIFT = 26 +var S_SAVE_PC_HI_RCNT_MASK = 0xFC000000 +var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 25 +var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x02000000 +var S_SAVE_PC_HI_REPLAY_W64H_SHIFT = 24 +var S_SAVE_PC_HI_REPLAY_W64H_MASK = 0x01000000 + +var s_sgpr_save_num = 108 + +var s_save_spi_init_lo = exec_lo +var s_save_spi_init_hi = exec_hi +var s_save_pc_lo = ttmp0 +var s_save_pc_hi = ttmp1 +var s_save_exec_lo = ttmp2 +var s_save_exec_hi = ttmp3 +var s_save_status = ttmp12 +var s_save_trapsts = ttmp5 +var s_save_xnack_mask = ttmp6 +var s_wave_size = ttmp7 +var s_save_buf_rsrc0 = ttmp8 +var s_save_buf_rsrc1 = ttmp9 +var s_save_buf_rsrc2 = ttmp10 +var s_save_buf_rsrc3 = ttmp11 +var s_save_mem_offset = ttmp14 +var s_save_alloc_size = s_save_trapsts +var s_save_tmp = s_save_buf_rsrc2 +var s_save_m0 = ttmp15 + +var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC + +var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 +var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 +var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 +var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 +var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 +var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 +var S_WAVE_SIZE = 25 + +var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT +var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK +var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT +var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK + +var s_restore_spi_init_lo = exec_lo +var s_restore_spi_init_hi = exec_hi +var s_restore_mem_offset = ttmp12 +var s_restore_alloc_size = ttmp3 +var s_restore_tmp = ttmp6 +var s_restore_mem_offset_save = s_restore_tmp +var s_restore_m0 = s_restore_alloc_size +var s_restore_mode = ttmp7 +var s_restore_flat_scratch = ttmp2 +var s_restore_pc_lo = ttmp0 +var s_restore_pc_hi = ttmp1 +var s_restore_exec_lo = ttmp14 +var s_restore_exec_hi = ttmp15 +var s_restore_status = ttmp4 +var s_restore_trapsts = ttmp5 +var s_restore_xnack_mask = ttmp13 +var s_restore_buf_rsrc0 = ttmp8 +var s_restore_buf_rsrc1 = ttmp9 +var s_restore_buf_rsrc2 = ttmp10 +var s_restore_buf_rsrc3 = ttmp11 +var s_restore_size = ttmp7 -wave_size(32) -/*************************************************************************/ -/* control on how to run the shader */ -/*************************************************************************/ -//any hack that needs to be made to run this code in EMU (either becasue various EMU code are not ready or no compute save & restore in EMU run) -var EMU_RUN_HACK = 0 -var EMU_RUN_HACK_RESTORE_NORMAL = 0 -var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 -var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 -var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK -var SAVE_LDS = 0 -var WG_BASE_ADDR_LO = 0x9000a000 -var WG_BASE_ADDR_HI = 0x0 -var WAVE_SPACE = 0x9000 //memory size that each wave occupies in workgroup state mem, increase from 5000 to 9000 for more SGPR need to be saved -var CTX_SAVE_CONTROL = 0x0 -var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL -var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either becasue various RTL code are not ready or no compute save & restore in RTL run) -var SGPR_SAVE_USE_SQC = 0 //use SQC D$ to do the write -var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //need to change BUF_DATA_FORMAT in S_SAVE_BUF_RSRC_WORD3_MISC from 0 to BUF_DATA_FORMAT_32 if set to 1 (i.e. 0x00827FAC) -var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing -var SAVE_RESTORE_HWID_DDID = 0 -var RESTORE_DDID_IN_SGPR18 = 0 -/**************************************************************************/ -/* variables */ -/**************************************************************************/ -var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 -var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 -var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 +shader main + asic(DEFAULT) + type(CS) + wave_size(32) -var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 -var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 -var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 -var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 -var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 -var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 4 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits -var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24 -var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4 -var SQ_WAVE_IB_STS2_WAVE64_SHIFT = 11 -var SQ_WAVE_IB_STS2_WAVE64_SIZE = 1 - -var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 -var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask -var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 -var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 -var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 -var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF -var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 -var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 -var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 -var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 -var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 - -var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME -var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME -var SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE = 1 //FIXME -var SQ_WAVE_IB_STS_RCNT_SIZE = 6 //FIXME -var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME - -var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 -var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 - - -/* Save */ -var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes -var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE - -var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit -var S_SAVE_SPI_INIT_ATC_SHIFT = 27 -var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype -var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 -var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG -var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 - -var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used -var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME - -var s_save_spi_init_lo = exec_lo -var s_save_spi_init_hi = exec_hi - -var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3¡¯h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} -var s_save_pc_hi = ttmp1 -var s_save_exec_lo = ttmp2 -var s_save_exec_hi = ttmp3 -var s_save_status = ttmp4 -var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine -var s_wave_size = ttmp6 //ttmp6 is not needed now, since it's only 32bit xnack mask, now use it to determine wave32 or wave64 in EMU_HACK -var s_save_xnack_mask = ttmp7 -var s_save_buf_rsrc0 = ttmp8 -var s_save_buf_rsrc1 = ttmp9 -var s_save_buf_rsrc2 = ttmp10 -var s_save_buf_rsrc3 = ttmp11 - -var s_save_mem_offset = ttmp14 -var s_sgpr_save_num = 106 //in gfx10, all sgpr must be saved -var s_save_alloc_size = s_save_trapsts //conflict -var s_save_tmp = s_save_buf_rsrc2 //shared with s_save_buf_rsrc2 (conflict: should not use mem access with s_save_tmp at the same time) -var s_save_m0 = ttmp15 - -/* Restore */ -var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE -var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC - -var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit -var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 -var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype -var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 -var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG -var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 - -var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT -var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK -var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT -var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK - -var s_restore_spi_init_lo = exec_lo -var s_restore_spi_init_hi = exec_hi - -var s_restore_mem_offset = ttmp12 -var s_restore_alloc_size = ttmp3 -var s_restore_tmp = ttmp6 -var s_restore_mem_offset_save = s_restore_tmp //no conflict - -var s_restore_m0 = s_restore_alloc_size //no conflict - -var s_restore_mode = ttmp13 -var s_restore_hwid1 = ttmp2 -var s_restore_ddid = s_restore_hwid1 -var s_restore_pc_lo = ttmp0 -var s_restore_pc_hi = ttmp1 -var s_restore_exec_lo = ttmp14 -var s_restore_exec_hi = ttmp15 -var s_restore_status = ttmp4 -var s_restore_trapsts = ttmp5 -//var s_restore_xnack_mask_lo = xnack_mask_lo -//var s_restore_xnack_mask_hi = xnack_mask_hi -var s_restore_xnack_mask = ttmp7 -var s_restore_buf_rsrc0 = ttmp8 -var s_restore_buf_rsrc1 = ttmp9 -var s_restore_buf_rsrc2 = ttmp10 -var s_restore_buf_rsrc3 = ttmp11 -var s_restore_size = ttmp13 //ttmp13 has no conflict - -/**************************************************************************/ -/* trap handler entry points */ -/**************************************************************************/ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore - //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC - s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. - s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE - //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE - s_branch L_SKIP_RESTORE //NOT restore, SAVE actually - else - s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save - end + s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save L_JUMP_TO_RESTORE: - s_branch L_RESTORE //restore + s_branch L_RESTORE L_SKIP_RESTORE: - - s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save - s_cbranch_scc1 L_SAVE //this is the operation for save - - // ********* Handle non-CWSR traps ******************* - if (!EMU_RUN_HACK) - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception - s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. - s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 - - L_EXCP_CASE: - s_and_b32 ttmp1, ttmp1, 0xFFFF - s_rfe_b64 [ttmp0, ttmp1] - end - // ********* End handling of non-CWSR traps ******************* - -/**************************************************************************/ -/* save routine */ -/**************************************************************************/ + s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save + s_cbranch_scc1 L_SAVE + + // If STATUS.MEM_VIOL is asserted then halt the wave to prevent + // the exception raising again and blocking context save. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK + s_cbranch_scc0 L_FETCH_2ND_TRAP + s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK + +L_FETCH_2ND_TRAP: + // Preserve and clear scalar XNACK state before issuing scalar loads. + // Save IB_STS.REPLAY_W64H[25], RCNT[21:16], FIRST_REPLAY[15] into + // unused space ttmp11[31:24]. + s_andn2_b32 ttmp11, ttmp11, (TTMP11_SAVE_REPLAY_W64H_MASK | TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK) + s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) + s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK + s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) + s_or_b32 ttmp11, ttmp11, ttmp3 + s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_or_b32 ttmp11, ttmp11, ttmp3 + s_andn2_b32 ttmp2, ttmp2, (SQ_WAVE_IB_STS_REPLAY_W64H_MASK | SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK) + s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + + // Read second-level TBA/TMA from first-level TMA and jump if available. + // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) + // ttmp12 holds SQ_WAVE_STATUS + s_getreg_b32 ttmp4, hwreg(HW_REG_SHADER_TMA_LO) + s_getreg_b32 ttmp5, hwreg(HW_REG_SHADER_TMA_HI) + s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA + s_waitcnt lgkmcnt(0) + s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA + s_waitcnt lgkmcnt(0) + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] + s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set + s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler + +L_NO_NEXT_TRAP: + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK + s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. + s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 + s_addc_u32 ttmp1, ttmp1, 0 +L_EXCP_CASE: + s_and_b32 ttmp1, ttmp1, 0xFFFF + + // Restore SQ_WAVE_IB_STS. + s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_REPLAY_W64H_SHIFT - SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_REPLAY_W64H_MASK + s_or_b32 ttmp2, ttmp2, ttmp3 + s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + + // Restore SQ_WAVE_STATUS. + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status -L_SAVE: - + s_rfe_b64 [ttmp0, ttmp1] + +L_SAVE: //check whether there is mem_viol - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK s_cbranch_scc0 L_NO_PC_REWIND - + //if so, need rewind PC assuming GDS operation gets NACKed - s_mov_b32 s_save_tmp, 0 //clear mem_viol bit - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] - s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8 - s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc + s_mov_b32 s_save_tmp, 0 + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8 + s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 L_NO_PC_REWIND: - s_mov_b32 s_save_tmp, 0 //clear saveCtx bit - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit + s_mov_b32 s_save_tmp, 0 + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit - //s_mov_b32 s_save_xnack_mask_lo, xnack_mask_lo //save XNACK_MASK - //s_mov_b32 s_save_xnack_mask_hi, xnack_mask_hi - s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK) - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY - s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp - s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS - s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG + s_getreg_b32 s_save_xnack_mask, hwreg(HW_REG_SHADER_XNACK_MASK) + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT, SQ_WAVE_IB_STS_REPLAY_W64H_SIZE) + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_REPLAY_W64H_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY and REPLAY_W64H in IB_STS + s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp - - /* inform SPI the readiness and wait for SPI's go signal */ - s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI - s_mov_b32 s_save_exec_hi, exec_hi - s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive - if (EMU_RUN_HACK) - - else - s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC - end - - L_SLEEP: - s_sleep 0x2 - - if (EMU_RUN_HACK) - - else - s_cbranch_execz L_SLEEP - end - - - /* setup Resource Contants */ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) - //calculate wd_addr using absolute thread id - v_readlane_b32 s_save_tmp, v9, 0 - //determine it is wave32 or wave64 - s_getreg_b32 s_wave_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) - s_cmp_eq_u32 s_wave_size, 0 - s_cbranch_scc1 L_SAVE_WAVE32 - s_lshr_b32 s_save_tmp, s_save_tmp, 6 //SAVE WAVE64 - s_branch L_SAVE_CON - L_SAVE_WAVE32: - s_lshr_b32 s_save_tmp, s_save_tmp, 5 //SAVE WAVE32 - L_SAVE_CON: - s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE - s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL - else - end - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) - s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL - else - end - - - s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo - s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi - s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE - s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited - s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC - s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position - s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE - - s_mov_b32 s_save_m0, m0 //save M0 - - /* global mem offset */ - s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0 - s_getreg_b32 s_wave_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) //get wave_save_size - s_or_b32 s_wave_size, s_save_spi_init_hi, s_wave_size //share s_wave_size with exec_hi - - /* save VGPRs */ - ////////////////////////////// - L_SAVE_VGPR: - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on - s_and_b32 m0, s_wave_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI - s_mov_b32 exec_hi, 0x00000000 - s_branch L_SAVE_VGPR_NORMAL - L_ENABLE_SAVE_VGPR_EXEC_HI: - s_mov_b32 exec_hi, 0xFFFFFFFF - L_SAVE_VGPR_NORMAL: - s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size - //for wave32 and wave64, the num of vgpr function is the same? - s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible - //determine it is wave32 or wave64 - s_and_b32 m0, s_wave_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_SAVE_VGPR_WAVE64 - - //zhenxu added it for save vgpr for wave32 - s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 7 //NUM_RECORDS in bytes (32 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_mov_b32 m0, 0x0 //VGPR initial index value =0 - //s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 - //s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later, doesn't need this in gfx10 - - L_SAVE_VGPR_WAVE32_LOOP: - v_movrels_b32 v0, v0 //v0 = v[0+m0] - - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - end - - s_add_u32 m0, m0, 1 //next vgpr index - s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 //every buffer_store_dword does 128 bytes - s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_VGPR_WAVE32_LOOP //VGPR save is complete? - s_branch L_SAVE_LDS - //save vgpr for wave32 ends - - L_SAVE_VGPR_WAVE64: - s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_mov_b32 m0, 0x0 //VGPR initial index value =0 - //s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 - //s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later, doesn't need this in gfx10 - - L_SAVE_VGPR_WAVE64_LOOP: - v_movrels_b32 v0, v0 //v0 = v[0+m0] - - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - end - - s_add_u32 m0, m0, 1 //next vgpr index - s_add_u32 s_save_mem_offset, s_save_mem_offset, 256 //every buffer_store_dword does 256 bytes - s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_VGPR_WAVE64_LOOP //VGPR save is complete? - //s_set_gpr_idx_off - // - //Below part will be the save shared vgpr part (new for gfx10) - s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size - s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? - s_cbranch_scc0 L_SAVE_LDS //no shared_vgpr used? jump to L_SAVE_LDS - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) - //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. - //save shared_vgpr will start from the index of m0 - s_add_u32 s_save_alloc_size, s_save_alloc_size, m0 - s_mov_b32 exec_lo, 0xFFFFFFFF - s_mov_b32 exec_hi, 0x00000000 - L_SAVE_SHARED_VGPR_WAVE64_LOOP: - v_movrels_b32 v0, v0 //v0 = v[0+m0] - buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - s_add_u32 m0, m0, 1 //next vgpr index - s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 //every buffer_store_dword does 256 bytes - s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete? - - /* save LDS */ - ////////////////////////////// - L_SAVE_LDS: - - //Only check the first wave need LDS - /* the first wave in the threadgroup */ - s_barrier //FIXME not performance-optimal "LDS is used? wait for other waves in the same TG" - s_and_b32 s_save_tmp, s_wave_size, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here - s_cbranch_scc0 L_SAVE_SGPR - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on - s_and_b32 m0, s_wave_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI - s_mov_b32 exec_hi, 0x00000000 - s_branch L_SAVE_LDS_NORMAL - L_ENABLE_SAVE_LDS_EXEC_HI: - s_mov_b32 exec_hi, 0xFFFFFFFF - L_SAVE_LDS_NORMAL: - s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size - s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? - s_cbranch_scc0 L_SAVE_SGPR //no lds used? jump to L_SAVE_VGPR - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw - s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes - s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - //load 0~63*4(byte address) to vgpr v15 - v_mbcnt_lo_u32_b32 v0, -1, 0 - v_mbcnt_hi_u32_b32 v0, -1, v0 - v_mul_u32_u24 v0, 4, v0 - - s_and_b32 m0, s_wave_size, 1 - s_cmp_eq_u32 m0, 1 - s_mov_b32 m0, 0x0 - s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 - - L_SAVE_LDS_LOOP_W32: - if (SAVE_LDS) - ds_read_b32 v1, v0 - s_waitcnt 0 //ensure data ready - buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - //buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 //save lds to memory doesn't exist in 10 - end - s_add_u32 m0, m0, 128 //every buffer_store_lds does 128 bytes - s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 //mem offset increased by 128 bytes - v_add_nc_u32 v0, v0, 128 - s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete? - s_branch L_SAVE_SGPR - - L_SAVE_LDS_LOOP_W64: - if (SAVE_LDS) - ds_read_b32 v1, v0 - s_waitcnt 0 //ensure data ready - buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - //buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 //save lds to memory doesn't exist in 10 - end - s_add_u32 m0, m0, 256 //every buffer_store_lds does 256 bytes - s_add_u32 s_save_mem_offset, s_save_mem_offset, 256 //mem offset increased by 256 bytes - v_add_nc_u32 v0, v0, 256 - s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete? - - - /* save SGPRs */ - ////////////////////////////// - //s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size - //s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 - //s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) - //s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //In gfx10, Number of SGPRs = (sgpr_size + 1) * 8 (non-zero value) - L_SAVE_SGPR: - //need to look at it is wave32 or wave64 - s_and_b32 m0, s_wave_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_SAVE_SGPR_VMEM_WAVE64 - if (SGPR_SAVE_USE_SQC) - s_lshl_b32 s_save_buf_rsrc2, s_sgpr_save_num, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_save_buf_rsrc2, s_sgpr_save_num, 7 //NUM_RECORDS in bytes (32 threads) - end - s_branch L_SAVE_SGPR_CONT - L_SAVE_SGPR_VMEM_WAVE64: - if (SGPR_SAVE_USE_SQC) - s_lshl_b32 s_save_buf_rsrc2, s_sgpr_save_num, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_save_buf_rsrc2, s_sgpr_save_num, 8 //NUM_RECORDS in bytes (64 threads) - end - L_SAVE_SGPR_CONT: - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - //s_mov_b32 m0, 0x0 //SGPR initial index value =0 - //s_nop 0x0 //Manually inserted wait states - - s_and_b32 m0, s_wave_size, 1 - s_cmp_eq_u32 m0, 1 - - s_mov_b32 m0, 0x0 //SGPR initial index value =0 - s_nop 0x0 //Manually inserted wait states - - s_cbranch_scc1 L_SAVE_SGPR_LOOP_WAVE64 - - L_SAVE_SGPR_LOOP_WAVE32: - s_movrels_b32 s0, s0 //s0 = s[0+m0] - //zhenxu, adding one more argument to save sgpr function, this is only for vmem, using sqc is not change - write_sgpr_to_mem_wave32(s0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PV: the best performance should be using s_buffer_store_dwordx4 - s_add_u32 m0, m0, 1 //next sgpr index - s_cmp_lt_u32 m0, s_sgpr_save_num //scc = (m0 < s_sgpr_save_num) ? 1 : 0 - s_cbranch_scc1 L_SAVE_SGPR_LOOP_WAVE32 //SGPR save is complete? - s_branch L_SAVE_HWREG - - L_SAVE_SGPR_LOOP_WAVE64: - s_movrels_b32 s0, s0 //s0 = s[0+m0] - //zhenxu, adding one more argument to save sgpr function, this is only for vmem, using sqc is not change - write_sgpr_to_mem_wave64(s0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PV: the best performance should be using s_buffer_store_dwordx4 - s_add_u32 m0, m0, 1 //next sgpr index - s_cmp_lt_u32 m0, s_sgpr_save_num //scc = (m0 < s_sgpr_save_num) ? 1 : 0 - s_cbranch_scc1 L_SAVE_SGPR_LOOP_WAVE64 //SGPR save is complete? - - - /* save HW registers */ - ////////////////////////////// - L_SAVE_HWREG: - s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_and_b32 m0, s_wave_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_SAVE_HWREG_WAVE64 - - write_sgpr_to_mem_wave32(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //M0 - - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) - s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 - s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over - end - - write_sgpr_to_mem_wave32(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PC - write_sgpr_to_mem_wave32(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - write_sgpr_to_mem_wave32(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //EXEC - write_sgpr_to_mem_wave32(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - write_sgpr_to_mem_wave32(s_save_status, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //STATUS - - //s_save_trapsts conflicts with s_save_alloc_size - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - write_sgpr_to_mem_wave32(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //TRAPSTS - - //write_sgpr_to_mem_wave32(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_LO - write_sgpr_to_mem_wave32(s_save_xnack_mask, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_HI - - //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 - s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE - write_sgpr_to_mem_wave32(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - if(SAVE_RESTORE_HWID_DDID) - s_getreg_b32 s_save_m0, hwreg(HW_REG_HW_ID1) //HW_ID1, handler records the SE/SA/WGP/SIMD/wave of the original wave - write_sgpr_to_mem_wave32(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - end - s_branch L_S_PGM_END_SAVED - - L_SAVE_HWREG_WAVE64: - write_sgpr_to_mem_wave64(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //M0 - - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) - s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 - s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over - end - - write_sgpr_to_mem_wave64(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //PC - write_sgpr_to_mem_wave64(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - write_sgpr_to_mem_wave64(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //EXEC - write_sgpr_to_mem_wave64(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - write_sgpr_to_mem_wave64(s_save_status, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //STATUS - - //s_save_trapsts conflicts with s_save_alloc_size - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - write_sgpr_to_mem_wave64(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //TRAPSTS - - //write_sgpr_to_mem_wave64(s_save_xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_LO - write_sgpr_to_mem_wave64(s_save_xnack_mask, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) //XNACK_MASK_HI - - //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 - s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE - write_sgpr_to_mem_wave64(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - - - if(SAVE_RESTORE_HWID_DDID) - s_getreg_b32 s_save_m0, hwreg(HW_REG_HW_ID1) //HW_ID1, handler records the SE/SA/WGP/SIMD/wave of the original wave - write_sgpr_to_mem_wave64(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - - /* save DDID */ - ////////////////////////////// - L_SAVE_DDID: - //EXEC has been saved, no vector inst following - s_mov_b32 exec_lo, 0x80000000 //Set MSB to 1. Cleared when draw index is returned - s_sendmsg sendmsg(MSG_GET_DDID) - - L_WAIT_DDID_LOOP: - s_nop 7 // sleep a bit - s_bitcmp0_b32 exec_lo, 31 // test to see if MSB is cleared, meaning done - s_cbranch_scc0 L_WAIT_DDID_LOOP - - s_mov_b32 s_save_m0, exec_lo - - - s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - s_and_b32 m0, s_wave_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_SAVE_DDID_WAVE64 - - write_sgpr_to_mem_wave32(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - - L_SAVE_DDID_WAVE64: - write_sgpr_to_mem_wave64(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset, SGPR_SAVE_USE_SQC, USE_MTBUF_INSTEAD_OF_MUBUF) - - end - - L_S_PGM_END_SAVED: - /* S_PGM_END_SAVED */ //FIXME graphics ONLY - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] - s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 - s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over - s_rfe_b64 s_save_pc_lo //Return to the main shader program - else - end - - - s_branch L_END_PGM - - - -/**************************************************************************/ -/* restore routine */ -/**************************************************************************/ + + /* inform SPI the readiness and wait for SPI's go signal */ + s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI + s_mov_b32 s_save_exec_hi, exec_hi + s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive + + s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC + +L_SLEEP: + // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause + // SQ hang, since the 7,8th wave could not get arbit to exec inst, while + // other waves are stuck into the sleep-loop and waiting for wrexec!=0 + s_sleep 0x2 + s_cbranch_execz L_SLEEP + + /* setup Resource Contants */ + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited + s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE + + s_mov_b32 s_save_m0, m0 + + /* global mem offset */ + s_mov_b32 s_save_mem_offset, 0x0 + s_getreg_b32 s_wave_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) + s_lshl_b32 s_wave_size, s_wave_size, S_WAVE_SIZE + s_or_b32 s_wave_size, s_save_spi_init_hi, s_wave_size //share s_wave_size with exec_hi, it's at bit25 + + /* save HW registers */ + +L_SAVE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) + + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_xnack_mask, s_save_buf_rsrc0, s_save_mem_offset) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO) + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + + s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI) + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + + /* the first wave in the threadgroup */ + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK + s_mov_b32 s_save_exec_hi, 0x0 + s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26] + + /* save SGPRs */ + // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... + + // SGPR SR memory offset : size(VGPR)+size(SVGPR) + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 + s_mov_b32 s_save_xnack_mask, s_save_buf_rsrc0 + s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset + s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0 + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + s_nop 0x0 //Manually inserted wait states +L_SAVE_SGPR_LOOP: + // SGPR is allocated in 16 SGPR granularity + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] + s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] + + write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) + s_add_u32 m0, m0, 16 //next sgpr index + s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete? + + //save the rest 12 SGPR + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + write_12sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) + + // restore s_save_buf_rsrc0,1 + s_mov_b32 s_save_buf_rsrc0, s_save_xnack_mask + + /* save first 4 VGPR, then LDS save could use */ + // each wave will alloc 4 vgprs at least... + + s_mov_b32 s_save_mem_offset, 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_4VGPR_WAVE32 +L_ENABLE_SAVE_4VGPR_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF + s_branch L_SAVE_4VGPR_WAVE64 +L_SAVE_4VGPR_WAVE32: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3 + s_branch L_SAVE_LDS + +L_SAVE_4VGPR_WAVE64: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 + + /* save LDS */ + +L_SAVE_LDS: + // Change EXEC to all threads... + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_LDS_NORMAL +L_ENABLE_SAVE_LDS_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_SAVE_LDS_NORMAL: + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + + s_barrier //LDS is used? wait for other waves in the same TG + s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_SAVE_LDS_DONE + + // first wave do LDS save; + + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_save_mem_offset, s_wave_size) + get_svgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes() + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + //load 0~63*4(byte address) to vgpr v0 + v_mbcnt_lo_u32_b32 v0, -1, 0 + v_mbcnt_hi_u32_b32 v0, -1, v0 + v_mul_u32_u24 v0, 4, v0 + + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_mov_b32 m0, 0x0 + s_cbranch_scc1 L_SAVE_LDS_W64 + +L_SAVE_LDS_W32: + s_mov_b32 s3, 128 + s_nop 0 + s_nop 0 + s_nop 0 +L_SAVE_LDS_LOOP_W32: + ds_read_b32 v1, v0 + s_waitcnt 0 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 + v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete? + + s_branch L_SAVE_LDS_DONE + +L_SAVE_LDS_W64: + s_mov_b32 s3, 256 + s_nop 0 + s_nop 0 + s_nop 0 +L_SAVE_LDS_LOOP_W64: + ds_read_b32 v1, v0 + s_waitcnt 0 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 + v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete? + +L_SAVE_LDS_DONE: + /* save VGPRs - set the Rest VGPRs */ +L_SAVE_VGPR: + // VGPR SR memory offset: 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI + s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs + s_mov_b32 exec_hi, 0x00000000 + s_branch L_SAVE_VGPR_NORMAL +L_ENABLE_SAVE_VGPR_EXEC_HI: + s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs + s_mov_b32 exec_hi, 0xFFFFFFFF +L_SAVE_VGPR_NORMAL: + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + //determine it is wave32 or wave64 + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_SAVE_VGPR_WAVE64 + + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR Allocated in 4-GPR granularity + + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_END + +L_SAVE_VGPR_W32_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + v_movrels_b32 v1, v1 //v1 = v[1+m0] + v_movrels_b32 v2, v2 //v2 = v[2+m0] + v_movrels_b32 v3, v3 //v3 = v[3+m0] + + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:128*3 + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete? + + s_branch L_SAVE_VGPR_END + +L_SAVE_VGPR_WAVE64: + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_END + +L_SAVE_VGPR_W64_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + v_movrels_b32 v1, v1 //v1 = v[1+m0] + v_movrels_b32 v2, v2 //v2 = v[2+m0] + v_movrels_b32 v3, v3 //v3 = v[3+m0] + + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete? + + //Below part will be the save shared vgpr part (new for gfx10) + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? + s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) + //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. + //save shared_vgpr will start from the index of m0 + s_add_u32 s_save_alloc_size, s_save_alloc_size, m0 + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0x00000000 +L_SAVE_SHARED_VGPR_WAVE64_LOOP: + v_movrels_b32 v0, v0 //v0 = v[0+m0] + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + s_add_u32 m0, m0, 1 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 128 + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete? + +L_SAVE_VGPR_END: + s_branch L_END_PGM L_RESTORE: - /* Setup Resource Contants */ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) - //calculate wd_addr using absolute thread id - v_readlane_b32 s_restore_tmp, v9, 0 - //determine it is wave32 or wave64 - s_getreg_b32 s_restore_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) //change to ttmp13 - s_cmp_eq_u32 s_restore_size, 0 - s_cbranch_scc1 L_RESTORE_WAVE32 - s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 //SAVE WAVE64 - s_branch L_RESTORE_CON - L_RESTORE_WAVE32: - s_lshr_b32 s_restore_tmp, s_restore_tmp, 5 //SAVE WAVE32 - L_RESTORE_CON: - s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE - s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL - else - end - - s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo - s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi - s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE - s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) - s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC - s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK - s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position - s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE - //determine it is wave32 or wave64 - s_getreg_b32 s_restore_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) - s_or_b32 s_restore_size, s_restore_spi_init_hi, s_restore_size //share s_wave_size with exec_hi - - /* global mem offset */ - s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 - - /* restore VGPRs */ - ////////////////////////////// - L_RESTORE_VGPR: - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead - s_and_b32 m0, s_restore_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI - s_mov_b32 exec_hi, 0x00000000 - s_branch L_RESTORE_VGPR_NORMAL - L_ENABLE_RESTORE_VGPR_EXEC_HI: - s_mov_b32 exec_hi, 0xFFFFFFFF - L_RESTORE_VGPR_NORMAL: - s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size - s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) - //determine it is wave32 or wave64 - s_and_b32 m0, s_restore_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_RESTORE_VGPR_WAVE64 - - s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 7 //NUM_RECORDS in bytes (32 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 - s_mov_b32 m0, 1 //VGPR initial index value = 1 - //s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 - //s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later, might not need this in gfx10 - - L_RESTORE_VGPR_WAVE32_LOOP: - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 - end - s_waitcnt vmcnt(0) //ensure data ready - v_movreld_b32 v0, v0 //v[0+m0] = v0 - s_add_u32 m0, m0, 1 //next vgpr index - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //every buffer_load_dword does 128 bytes - s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete? - //s_set_gpr_idx_off - /* VGPR restore on v0 */ - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 - end - - s_branch L_RESTORE_LDS - - L_RESTORE_VGPR_WAVE64: - s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 - s_mov_b32 m0, 1 //VGPR initial index value = 1 - L_RESTORE_VGPR_WAVE64_LOOP: - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 - end - s_waitcnt vmcnt(0) //ensure data ready - v_movreld_b32 v0, v0 //v[0+m0] = v0 - s_add_u32 m0, m0, 1 //next vgpr index - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //every buffer_load_dword does 256 bytes - s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? - //s_set_gpr_idx_off - // - //Below part will be the restore shared vgpr part (new for gfx10) - s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size - s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? - s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used? jump to L_SAVE_LDS - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) - //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. - //restore shared_vgpr will start from the index of m0 - s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0 - s_mov_b32 exec_lo, 0xFFFFFFFF - s_mov_b32 exec_hi, 0x00000000 - L_RESTORE_SHARED_VGPR_WAVE64_LOOP: - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 - s_waitcnt vmcnt(0) //ensure data ready - v_movreld_b32 v0, v0 //v[0+m0] = v0 - s_add_u32 m0, m0, 1 //next vgpr index - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //every buffer_load_dword does 256 bytes - s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? - - s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!! - - /* VGPR restore on v0 */ - L_RESTORE_V0: - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 - end - - - /* restore LDS */ - ////////////////////////////// - L_RESTORE_LDS: - - //Only need to check the first wave - /* the first wave in the threadgroup */ - s_and_b32 s_restore_tmp, s_restore_size, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK - s_cbranch_scc0 L_RESTORE_SGPR - - s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead - s_and_b32 m0, s_restore_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI - s_mov_b32 exec_hi, 0x00000000 - s_branch L_RESTORE_LDS_NORMAL - L_ENABLE_RESTORE_LDS_EXEC_HI: - s_mov_b32 exec_hi, 0xFFFFFFFF - L_RESTORE_LDS_NORMAL: - s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size - s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? - s_cbranch_scc0 L_RESTORE_SGPR //no lds used? jump to L_RESTORE_VGPR - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw - s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes - s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_and_b32 m0, s_wave_size, 1 - s_cmp_eq_u32 m0, 1 - s_mov_b32 m0, 0x0 - s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 - - L_RESTORE_LDS_LOOP_W32: - if (SAVE_LDS) - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 - s_waitcnt 0 - end - s_add_u32 m0, m0, 128 //every buffer_load_dword does 256 bytes - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 256 bytes - s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete? - s_branch L_RESTORE_SGPR - - L_RESTORE_LDS_LOOP_W64: - if (SAVE_LDS) - buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 - s_waitcnt 0 - end - s_add_u32 m0, m0, 256 //every buffer_load_dword does 256 bytes - s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256 bytes - s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete? - - - /* restore SGPRs */ - ////////////////////////////// - //s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size - //s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 - //s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) - //s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SGPRs = (sgpr_size + 1) * 8 (non-zero value) - L_RESTORE_SGPR: - //need to look at it is wave32 or wave64 - s_and_b32 m0, s_restore_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_RESTORE_SGPR_VMEM_WAVE64 - if (SGPR_SAVE_USE_SQC) - s_lshl_b32 s_restore_buf_rsrc2, s_sgpr_save_num, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_restore_buf_rsrc2, s_sgpr_save_num, 7 //NUM_RECORDS in bytes (32 threads) - end - s_branch L_RESTORE_SGPR_CONT - L_RESTORE_SGPR_VMEM_WAVE64: - if (SGPR_SAVE_USE_SQC) - s_lshl_b32 s_restore_buf_rsrc2, s_sgpr_save_num, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_restore_buf_rsrc2, s_sgpr_save_num, 8 //NUM_RECORDS in bytes (64 threads) - end - - L_RESTORE_SGPR_CONT: - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_and_b32 m0, s_restore_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_RESTORE_SGPR_WAVE64 - - read_sgpr_from_mem_wave32(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //save s0 to s_restore_tmp - s_mov_b32 m0, 0x1 - - L_RESTORE_SGPR_LOOP_WAVE32: - read_sgpr_from_mem_wave32(s0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PV: further performance improvement can be made - s_waitcnt lgkmcnt(0) //ensure data ready - s_movreld_b32 s0, s0 //s[0+m0] = s0 - s_nop 0 // hazard SALU M0=> S_MOVREL - s_add_u32 m0, m0, 1 //next sgpr index - s_cmp_lt_u32 m0, s_sgpr_save_num //scc = (m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_SGPR_LOOP_WAVE32 //SGPR restore (except s0) is complete? - s_mov_b32 s0, s_restore_tmp /* SGPR restore on s0 */ - s_branch L_RESTORE_HWREG - - L_RESTORE_SGPR_WAVE64: - read_sgpr_from_mem_wave64(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //save s0 to s_restore_tmp - s_mov_b32 m0, 0x1 //SGPR initial index value =1 //go on with with s1 - - L_RESTORE_SGPR_LOOP_WAVE64: - read_sgpr_from_mem_wave64(s0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PV: further performance improvement can be made - s_waitcnt lgkmcnt(0) //ensure data ready - s_movreld_b32 s0, s0 //s[0+m0] = s0 - s_nop 0 // hazard SALU M0=> S_MOVREL - s_add_u32 m0, m0, 1 //next sgpr index - s_cmp_lt_u32 m0, s_sgpr_save_num //scc = (m0 < s_restore_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_RESTORE_SGPR_LOOP_WAVE64 //SGPR restore (except s0) is complete? - s_mov_b32 s0, s_restore_tmp /* SGPR restore on s0 */ - - - /* restore HW registers */ - ////////////////////////////// - L_RESTORE_HWREG: - s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_and_b32 m0, s_restore_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_RESTORE_HWREG_WAVE64 - - read_sgpr_from_mem_wave32(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //M0 - read_sgpr_from_mem_wave32(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PC - read_sgpr_from_mem_wave32(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) - read_sgpr_from_mem_wave32(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //EXEC - read_sgpr_from_mem_wave32(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) - read_sgpr_from_mem_wave32(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //STATUS - read_sgpr_from_mem_wave32(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //TRAPSTS - //read_sgpr_from_mem_wave32(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_LO - //read_sgpr_from_mem_wave32(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_HI - read_sgpr_from_mem_wave32(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK - read_sgpr_from_mem_wave32(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //MODE - if(SAVE_RESTORE_HWID_DDID) - read_sgpr_from_mem_wave32(s_restore_hwid1, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //HW_ID1 - end - s_branch L_RESTORE_HWREG_FINISH - - L_RESTORE_HWREG_WAVE64: - read_sgpr_from_mem_wave64(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //M0 - read_sgpr_from_mem_wave64(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //PC - read_sgpr_from_mem_wave64(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) - read_sgpr_from_mem_wave64(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //EXEC - read_sgpr_from_mem_wave64(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) - read_sgpr_from_mem_wave64(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //STATUS - read_sgpr_from_mem_wave64(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //TRAPSTS - //read_sgpr_from_mem_wave64(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_LO - //read_sgpr_from_mem_wave64(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK_HI - read_sgpr_from_mem_wave64(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //XNACK_MASK - read_sgpr_from_mem_wave64(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //MODE - if(SAVE_RESTORE_HWID_DDID) - read_sgpr_from_mem_wave64(s_restore_hwid1, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) //HW_ID1 - end - L_RESTORE_HWREG_FINISH: - s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS - - - - if(SAVE_RESTORE_HWID_DDID) - L_RESTORE_DDID: - s_mov_b32 m0, s_restore_hwid1 //virture ttrace support: The save-context handler records the SE/SA/WGP/SIMD/wave of the original wave - s_ttracedata //and then can output it as SHADER_DATA to ttrace on restore to provide a correlation across the save-restore - - s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - s_and_b32 m0, s_restore_size, 1 - s_cmp_eq_u32 m0, 1 - s_cbranch_scc1 L_RESTORE_DDID_WAVE64 - - read_sgpr_from_mem_wave32(s_restore_ddid, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) - s_branch L_RESTORE_DDID_FINISH - L_RESTORE_DDID_WAVE64: - read_sgpr_from_mem_wave64(s_restore_ddid, s_restore_buf_rsrc0, s_restore_mem_offset, SGPR_SAVE_USE_SQC) - - L_RESTORE_DDID_FINISH: - s_waitcnt lgkmcnt(0) - //s_mov_b32 m0, s_restore_ddid - //s_ttracedata - if (RESTORE_DDID_IN_SGPR18) - s_mov_b32 s18, s_restore_ddid - end - - end - - s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS - - //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) - s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) - s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over - end - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) - s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal - s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over - end - - s_mov_b32 m0, s_restore_m0 - s_mov_b32 exec_lo, s_restore_exec_lo - s_mov_b32 exec_hi, s_restore_exec_hi - - s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts + /* Setup Resource Contants */ + s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo + s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE + //determine it is wave32 or wave64 + s_getreg_b32 s_restore_size, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE) + s_lshl_b32 s_restore_size, s_restore_size, S_WAVE_SIZE + s_or_b32 s_restore_size, s_restore_spi_init_hi, s_restore_size + + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_RESTORE_VGPR + + /* restore LDS */ +L_RESTORE_LDS: + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_RESTORE_LDS_NORMAL +L_ENABLE_RESTORE_LDS_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_RESTORE_LDS_NORMAL: + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_mov_b32 m0, 0x0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 + +L_RESTORE_LDS_LOOP_W32: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW + s_add_u32 m0, m0, 128 // 128 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete? + s_branch L_RESTORE_VGPR + +L_RESTORE_LDS_LOOP_W64: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW + s_add_u32 m0, m0, 256 // 256 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete? + + /* restore VGPRs */ +L_RESTORE_VGPR: + // VGPR SR memory offset : 0 + s_mov_b32 s_restore_mem_offset, 0x0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI + s_mov_b32 exec_hi, 0x00000000 + s_branch L_RESTORE_VGPR_NORMAL +L_ENABLE_RESTORE_VGPR_EXEC_HI: + s_mov_b32 exec_hi, 0xFFFFFFFF +L_RESTORE_VGPR_NORMAL: + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + //determine it is wave32 or wave64 + s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE64 + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 + s_mov_b32 m0, 4 //VGPR initial index value = 4 + +L_RESTORE_VGPR_WAVE32_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:128*3 + s_waitcnt vmcnt(0) + v_movreld_b32 v0, v0 //v[0+m0] = v0 + v_movreld_b32 v1, v1 + v_movreld_b32 v2, v2 + v_movreld_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete? + + /* VGPR restore on v0 */ + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:128*3 + + s_branch L_RESTORE_SGPR + +L_RESTORE_VGPR_WAVE64: + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_mov_b32 m0, 4 //VGPR initial index value = 4 + +L_RESTORE_VGPR_WAVE64_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 + s_waitcnt vmcnt(0) + v_movreld_b32 v0, v0 //v[0+m0] = v0 + v_movreld_b32 v1, v1 + v_movreld_b32 v2, v2 + v_movreld_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? + + //Below part will be the restore shared vgpr part (new for gfx10) + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero? + s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used? + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value) + //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count. + //restore shared_vgpr will start from the index of m0 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0 + s_mov_b32 exec_lo, 0xFFFFFFFF + s_mov_b32 exec_hi, 0x00000000 +L_RESTORE_SHARED_VGPR_WAVE64_LOOP: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + s_waitcnt vmcnt(0) + v_movreld_b32 v0, v0 //v[0+m0] = v0 + s_add_u32 m0, m0, 1 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete? + + s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!! + + /* VGPR restore on v0 */ +L_RESTORE_V0: + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 + + /* restore SGPRs */ + //will be 2+8+16*6 + // SGPR SR memory offset : size(VGPR)+size(SVGPR) +L_RESTORE_SGPR: + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + s_mov_b32 m0, s_sgpr_save_num + + read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_waitcnt lgkmcnt(0) + + s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + + read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_waitcnt lgkmcnt(0) + + s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + + L_RESTORE_SGPR_LOOP: + read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) + s_waitcnt lgkmcnt(0) + + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + s_movreld_b64 s8, s8 + s_movreld_b64 s10, s10 + s_movreld_b64 s12, s12 + s_movreld_b64 s14, s14 + + s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0 + s_cbranch_scc0 L_RESTORE_SGPR_LOOP + + /* restore HW registers */ +L_RESTORE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR) + get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size) + get_svgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes() + + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) + s_waitcnt lgkmcnt(0) + + s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_LO), s_restore_flat_scratch + + read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset) + s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS + + s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch + + s_mov_b32 s_restore_tmp, s_restore_pc_hi + s_and_b32 s_restore_pc_hi, s_restore_tmp, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + + s_mov_b32 m0, s_restore_m0 + s_mov_b32 exec_lo, s_restore_exec_lo + s_mov_b32 exec_hi, s_restore_exec_hi + + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 - s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask //restore xnack_mask - s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts - s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT + s_setreg_b32 hwreg(HW_REG_SHADER_XNACK_MASK), s_restore_xnack_mask + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 - //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore - s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode - //reuse s_restore_m0 as a temp register - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT - s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT - s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT - s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 - s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK - s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT - s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp - s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status - - s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG //FIXME not performance-optimal at this time - - -// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution - s_rfe_b64 s_restore_pc_lo // s_restore_m0[0] is used to set STATUS.inst_atc - - -/**************************************************************************/ -/* the END */ -/**************************************************************************/ -L_END_PGM: + s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode + s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_RCNT_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT + s_mov_b32 s_restore_mode, 0x0 + s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_FIRST_REPLAY_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT + s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_tmp, S_SAVE_PC_HI_REPLAY_W64H_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_REPLAY_W64H_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_REPLAY_W64H_SHIFT + s_or_b32 s_restore_mode, s_restore_mode, s_restore_m0 + + s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT + s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_mode + + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + + s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG + + s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + +L_END_PGM: s_endpgm - -end +end + +function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) + s_mov_b32 exec_lo, m0 + s_mov_b32 m0, s_mem_offset + s_buffer_store_dword s, s_rsrc, m0 glc:1 + s_add_u32 s_mem_offset, s_mem_offset, 4 + s_mov_b32 m0, exec_lo +end + + +function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset) + s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 + s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 + s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 + s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1 + s_add_u32 s_rsrc[0], s_rsrc[0], 4*16 + s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 +end + +function write_12sgpr_to_mem(s, s_rsrc, s_mem_offset) + s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 + s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 + s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 + s_add_u32 s_rsrc[0], s_rsrc[0], 4*12 + s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 +end + + +function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 + s_add_u32 s_mem_offset, s_mem_offset, 4 +end + +function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*16 + s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1 +end + +function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*8 + s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset glc:1 +end + +function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_sub_u32 s_mem_offset, s_mem_offset, 4*4 + s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset glc:1 +end -/**************************************************************************/ -/* the helper functions */ -/**************************************************************************/ -function write_sgpr_to_mem_wave32(s, s_rsrc, s_mem_offset, use_sqc, use_mtbuf) - if (use_sqc) - s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on - s_mov_b32 m0, s_mem_offset - s_buffer_store_dword s, s_rsrc, m0 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 4 - s_mov_b32 m0, exec_lo - elsif (use_mtbuf) - v_mov_b32 v0, s - tbuffer_store_format_x v0, v0, s_rsrc, s_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 128 - else - v_mov_b32 v0, s - buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 128 - end +function get_lds_size_bytes(s_lds_size_byte) + s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) + s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW end -function write_sgpr_to_mem_wave64(s, s_rsrc, s_mem_offset, use_sqc, use_mtbuf) - if (use_sqc) - s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on - s_mov_b32 m0, s_mem_offset - s_buffer_store_dword s, s_rsrc, m0 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 4 - s_mov_b32 m0, exec_lo - elsif (use_mtbuf) - v_mov_b32 v0, s - tbuffer_store_format_x v0, v0, s_rsrc, s_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 256 - else - v_mov_b32 v0, s - buffer_store_dword v0, v0, s_rsrc, s_mem_offset slc:1 glc:1 - s_add_u32 s_mem_offset, s_mem_offset, 256 - end +function get_vgpr_size_bytes(s_vgpr_size_byte, s_size) + s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) + s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 + s_lshr_b32 m0, s_size, S_WAVE_SIZE + s_and_b32 m0, m0, 1 + s_cmp_eq_u32 m0, 1 + s_cbranch_scc1 L_ENABLE_SHIFT_W64 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value) + s_branch L_SHIFT_DONE +L_ENABLE_SHIFT_W64: + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) +L_SHIFT_DONE: end -function read_sgpr_from_mem_wave32(s, s_rsrc, s_mem_offset, use_sqc) - s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 - if (use_sqc) - s_add_u32 s_mem_offset, s_mem_offset, 4 - else - s_add_u32 s_mem_offset, s_mem_offset, 128 - end +function get_svgpr_size_bytes(s_svgpr_size_byte) + s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) + s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7) end -function read_sgpr_from_mem_wave64(s, s_rsrc, s_mem_offset, use_sqc) - s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 - if (use_sqc) - s_add_u32 s_mem_offset, s_mem_offset, 4 - else - s_add_u32 s_mem_offset, s_mem_offset, 256 - end +function get_sgpr_size_bytes + return 512 end +function get_hwreg_size_bytes + return 128 +end diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm 2019-08-31 15:01:11.853736168 -0500 @@ -24,78 +24,6 @@ * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex */ -/* HW (VI) source code for CWSR trap handler */ -/* Version 18 + multiple trap handler */ - -// this performance-optimal version was originally from Seven Xu at SRDC - -// Revison #18 --... -/* Rev History -** #1. Branch from gc dv. //gfxip/gfx8/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) -** #4. SR Memory Layout: -** 1. VGPR-SGPR-HWREG-{LDS} -** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. -** #5. Update: 1. Accurate g8sr_ts_save_d timestamp -** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation) -** #7. Update: 1. don't barrier if noLDS -** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version -** 2. Fix SQ issue by s_sleep 2 -** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last -** 2. optimize s_buffer save by burst 16sgprs... -** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs. -** #11. Update 1. Add 2 more timestamp for debug version -** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance -** #13. Integ 1. Always use MUBUF for PV trap shader... -** #14. Update 1. s_buffer_store soft clause... -** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot. -** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree -** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part] -** 2. PERF - Save LDS before save VGPR to cover LDS save long latency... -** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32 -** 2. FUNC - Handle non-CWSR traps -*/ - -var G8SR_WDMEM_HWREG_OFFSET = 0 -var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes - -// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore. - -var G8SR_DEBUG_TIMESTAMP = 0 -var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset -var s_g8sr_ts_save_s = s[34:35] // save start -var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi -var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ -var s_g8sr_ts_save_d = s[40:41] // save end -var s_g8sr_ts_restore_s = s[42:43] // restore start -var s_g8sr_ts_restore_d = s[44:45] // restore end - -var G8SR_VGPR_SR_IN_DWX4 = 0 -var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes -var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 - - -/*************************************************************************/ -/* control on how to run the shader */ -/*************************************************************************/ -//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) -var EMU_RUN_HACK = 0 -var EMU_RUN_HACK_RESTORE_NORMAL = 0 -var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 -var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 -var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK -var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK -var EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK -var SAVE_LDS = 1 -var WG_BASE_ADDR_LO = 0x9000a000 -var WG_BASE_ADDR_HI = 0x0 -var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem -var CTX_SAVE_CONTROL = 0x0 -var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL -var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) -var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write -var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes -var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing - /**************************************************************************/ /* variables */ /**************************************************************************/ @@ -226,16 +154,7 @@ type(CS) - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore - //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC - s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. - s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE - //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE - s_branch L_SKIP_RESTORE //NOT restore, SAVE actually - else s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save - end L_JUMP_TO_RESTORE: s_branch L_RESTORE //restore @@ -249,7 +168,7 @@ s_cbranch_scc1 L_SAVE //this is the operation for save // ********* Handle non-CWSR traps ******************* -if (!EMU_RUN_HACK) + /* read tba and tma for next level trap handler, ttmp4 is used as s_save_status */ s_load_dwordx4 [ttmp8,ttmp9,ttmp10, ttmp11], [tma_lo,tma_hi], 0 s_waitcnt lgkmcnt(0) @@ -268,7 +187,7 @@ s_and_b32 ttmp1, ttmp1, 0xFFFF set_status_without_spi_prio(s_save_status, ttmp2) //restore HW status(SCC) s_rfe_b64 [ttmp0, ttmp1] -end + // ********* End handling of non-CWSR traps ******************* /**************************************************************************/ @@ -276,12 +195,6 @@ /**************************************************************************/ L_SAVE: - -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_save_s - s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? -end - s_mov_b32 s_save_tmp, 0 //clear saveCtx bit s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit @@ -303,16 +216,7 @@ s_mov_b32 s_save_exec_hi, exec_hi s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_sq_save_msg - s_waitcnt lgkmcnt(0) -end - - if (EMU_RUN_HACK) - - else s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC - end // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) @@ -321,36 +225,9 @@ L_SLEEP: s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 - if (EMU_RUN_HACK) - - else s_cbranch_execz L_SLEEP - end - -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_spi_wrexec - s_waitcnt lgkmcnt(0) -end /* setup Resource Contants */ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) - //calculate wd_addr using absolute thread id - v_readlane_b32 s_save_tmp, v9, 0 - s_lshr_b32 s_save_tmp, s_save_tmp, 6 - s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE - s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL - else - end - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) - s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL - else - end - - s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE @@ -383,22 +260,10 @@ s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0 - - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) - s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 - s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over - s_mov_b32 tba_lo, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_LO - s_mov_b32 tba_hi, EMU_RUN_HACK_SAVE_FIRST_TIME_TBA_HI - end - write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC @@ -440,18 +305,8 @@ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) - if (SGPR_SAVE_USE_SQC) s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) - end - - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0 @@ -490,30 +345,14 @@ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on s_mov_b32 exec_hi, 0xFFFFFFFF - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - // VGPR Allocated in 4-GPR granularity -if G8SR_VGPR_SR_IN_DWX4 - // the const stride for DWx4 is 4*4 bytes - s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes - - buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - - s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes -else buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 -end @@ -549,64 +388,10 @@ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - s_mov_b32 m0, 0x0 //lds_offset initial value = 0 -var LDS_DMA_ENABLE = 0 -var UNROLL = 0 -if UNROLL==0 && LDS_DMA_ENABLE==1 - s_mov_b32 s3, 256*2 - s_nop 0 - s_nop 0 - s_nop 0 - L_SAVE_LDS_LOOP: - //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.??? - if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity - buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW - buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW - end - - s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes - s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes - s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? - -elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss - // store from higest LDS address to lowest - s_mov_b32 s3, 256*2 - s_sub_u32 m0, s_save_alloc_size, s3 - s_add_u32 s_save_mem_offset, s_save_mem_offset, m0 - s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks... - s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest - s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction - s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc - s_nop 0 - s_nop 0 - s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes - s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved - s_add_u32 s0, s0,s_save_alloc_size - s_addc_u32 s1, s1, 0 - s_setpc_b64 s[0:1] - - - for var i =0; i< 128; i++ - // be careful to make here a 64Byte aligned address, which could improve performance... - buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW - buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW - - if i!=127 - s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline - s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3 - end - end - -else // BUFFER_STORE v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0 v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid v_mul_i32_i24 v2, v3, 8 // tid*8 @@ -628,8 +413,6 @@ // restore rsrc3 s_mov_b32 s_save_buf_rsrc3, s0 -end - L_SAVE_LDS_DONE: @@ -647,44 +430,8 @@ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - - // VGPR Allocated in 4-GPR granularity -if G8SR_VGPR_SR_IN_DWX4 - // the const stride for DWx4 is 4*4 bytes - s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes - - s_mov_b32 m0, 4 // skip first 4 VGPRs - s_cmp_lt_u32 m0, s_save_alloc_size - s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs - - s_set_gpr_idx_on m0, 0x1 // This will change M0 - s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0 -L_SAVE_VGPR_LOOP: - v_mov_b32 v0, v0 // v0 = v[0+m0] - v_mov_b32 v1, v1 - v_mov_b32 v2, v2 - v_mov_b32 v3, v3 - - - buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - s_add_u32 m0, m0, 4 - s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 - s_cmp_lt_u32 m0, s_save_alloc_size - s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? - s_set_gpr_idx_off -L_SAVE_VGPR_LOOP_END: - - s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes -else // VGPR store using dw burst s_mov_b32 m0, 0x4 //VGPR initial index value =0 s_cmp_lt_u32 m0, s_save_alloc_size @@ -700,52 +447,18 @@ v_mov_b32 v2, v2 //v0 = v[0+m0] v_mov_b32 v3, v3 //v0 = v[0+m0] - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 - end s_add_u32 m0, m0, 4 //next vgpr index s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? s_set_gpr_idx_off -end L_SAVE_VGPR_END: - - - - - - - /* S_PGM_END_SAVED */ //FIXME graphics ONLY - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] - s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 - s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over - s_rfe_b64 s_save_pc_lo //Return to the main shader program - else - end - -// Save Done timestamp -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_save_d - // SGPR SR memory offset : size(VGPR) - get_vgpr_size_bytes(s_save_mem_offset) - s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET - s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? - // Need reset rsrc2?? - s_mov_b32 m0, s_save_mem_offset - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1 -end - - s_branch L_END_PGM @@ -756,27 +469,6 @@ L_RESTORE: /* Setup Resource Contants */ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) - //calculate wd_addr using absolute thread id - v_readlane_b32 s_restore_tmp, v9, 0 - s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 - s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE - s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL - else - end - -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_restore_s - s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? - // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case... - s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0] - s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored.. -end - - - s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE @@ -818,18 +510,12 @@ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow??? - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end s_mov_b32 m0, 0x0 //lds_offset initial value = 0 L_RESTORE_LDS_LOOP: - if (SAVE_LDS) buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW - end s_add_u32 m0, m0, 256*2 // 128 DW s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 @@ -848,40 +534,8 @@ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end -if G8SR_VGPR_SR_IN_DWX4 - get_vgpr_size_bytes(s_restore_mem_offset) - s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 - - // the const stride for DWx4 is 4*4 bytes - s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes - - s_mov_b32 m0, s_restore_alloc_size - s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0 - -L_RESTORE_VGPR_LOOP: - buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 - s_waitcnt vmcnt(0) - s_sub_u32 m0, m0, 4 - v_mov_b32 v0, v0 // v[0+m0] = v0 - v_mov_b32 v1, v1 - v_mov_b32 v2, v2 - v_mov_b32 v3, v3 - s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 - s_cmp_eq_u32 m0, 0x8000 - s_cbranch_scc0 L_RESTORE_VGPR_LOOP - s_set_gpr_idx_off - - s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes - -else // VGPR load using dw burst s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 @@ -890,14 +544,10 @@ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later L_RESTORE_VGPR_LOOP: - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 - end s_waitcnt vmcnt(0) //ensure data ready v_mov_b32 v0, v0 //v[0+m0] = v0 v_mov_b32 v1, v1 @@ -909,16 +559,10 @@ s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? s_set_gpr_idx_off /* VGPR restore on v0 */ - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 - end - -end /* restore SGPRs */ ////////////////////////////// @@ -934,16 +578,8 @@ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) - if (SGPR_SAVE_USE_SQC) s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) - end - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end /* If 112 SGPRs ar allocated, 4 sgprs are not used TBA(108,109),TMA(110,111), However, we are safe to restore these 4 SGPRs anyway, since TBA,TMA will later be restored by HWREG @@ -972,12 +608,6 @@ ////////////////////////////// L_RESTORE_HWREG: - -if G8SR_DEBUG_TIMESTAMP - s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo - s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi -end - // HWREG SR memory offset : size(VGPR)+size(SGPR) get_vgpr_size_bytes(s_restore_mem_offset) get_sgpr_size_bytes(s_restore_tmp) @@ -985,11 +615,7 @@ s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0 read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC @@ -1006,16 +632,6 @@ s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS - //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) - s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) - s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over - end - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) - s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal - s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over - end - s_mov_b32 m0, s_restore_m0 s_mov_b32 exec_lo, s_restore_exec_lo s_mov_b32 exec_hi, s_restore_exec_hi @@ -1048,11 +664,6 @@ s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_restore_d - s_waitcnt lgkmcnt(0) -end - // s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm 2019-08-31 15:01:11.853736168 -0500 @@ -24,76 +24,9 @@ * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex */ -/* HW (GFX9) source code for CWSR trap handler */ -/* Version 18 + multiple trap handler */ - -// this performance-optimal version was originally from Seven Xu at SRDC - -// Revison #18 --... -/* Rev History -** #1. Branch from gc dv. //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) -** #4. SR Memory Layout: -** 1. VGPR-SGPR-HWREG-{LDS} -** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. -** #5. Update: 1. Accurate g8sr_ts_save_d timestamp -** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation) -** #7. Update: 1. don't barrier if noLDS -** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version -** 2. Fix SQ issue by s_sleep 2 -** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last -** 2. optimize s_buffer save by burst 16sgprs... -** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs. -** #11. Update 1. Add 2 more timestamp for debug version -** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance -** #13. Integ 1. Always use MUBUF for PV trap shader... -** #14. Update 1. s_buffer_store soft clause... -** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot. -** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree -** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part] -** 2. PERF - Save LDS before save VGPR to cover LDS save long latency... -** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32 -** 2. FUNC - Handle non-CWSR traps -*/ - -var G8SR_WDMEM_HWREG_OFFSET = 0 -var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes - -// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore. - -var G8SR_DEBUG_TIMESTAMP = 0 -var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset -var s_g8sr_ts_save_s = s[34:35] // save start -var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi -var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ -var s_g8sr_ts_save_d = s[40:41] // save end -var s_g8sr_ts_restore_s = s[42:43] // restore start -var s_g8sr_ts_restore_d = s[44:45] // restore end - -var G8SR_VGPR_SR_IN_DWX4 = 0 -var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes -var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 - - -/*************************************************************************/ -/* control on how to run the shader */ -/*************************************************************************/ -//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) -var EMU_RUN_HACK = 0 -var EMU_RUN_HACK_RESTORE_NORMAL = 0 -var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 -var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 -var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK -var SAVE_LDS = 1 -var WG_BASE_ADDR_LO = 0x9000a000 -var WG_BASE_ADDR_HI = 0x0 -var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem -var CTX_SAVE_CONTROL = 0x0 -var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL -var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) -var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write -var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes -var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency +var SAVE_AFTER_XNACK_ERROR = 1 //workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger +var SINGLE_STEP_MISSED_WORKAROUND = 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised /**************************************************************************/ /* variables */ @@ -107,6 +40,7 @@ var SQ_WAVE_STATUS_PRE_SPI_PRIO_SIZE = 1 var SQ_WAVE_STATUS_POST_SPI_PRIO_SHIFT = 3 var SQ_WAVE_STATUS_POST_SPI_PRIO_SIZE = 29 +var SQ_WAVE_STATUS_ALLOW_REPLAY_MASK = 0x400000 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 @@ -127,12 +61,15 @@ var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 +var SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK = 0x10000000 var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME +var SQ_WAVE_MODE_DEBUG_EN_MASK = 0x800 + var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 @@ -197,13 +134,15 @@ var s_restore_spi_init_hi = exec_hi var s_restore_mem_offset = ttmp12 +var s_restore_accvgpr_offset = ttmp13 var s_restore_alloc_size = ttmp3 var s_restore_tmp = ttmp2 var s_restore_mem_offset_save = s_restore_tmp //no conflict +var s_restore_accvgpr_offset_save = ttmp7 var s_restore_m0 = s_restore_alloc_size //no conflict -var s_restore_mode = ttmp7 +var s_restore_mode = s_restore_accvgpr_offset_save var s_restore_pc_lo = ttmp0 var s_restore_pc_hi = ttmp1 @@ -226,20 +165,11 @@ /* Shader Main*/ shader main - asic(GFX9) + asic(DEFAULT) type(CS) - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore - //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC - s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC - s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. - s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE - //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE - s_branch L_SKIP_RESTORE //NOT restore, SAVE actually - else s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save - end L_JUMP_TO_RESTORE: s_branch L_RESTORE //restore @@ -248,12 +178,29 @@ s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save + +if SINGLE_STEP_MISSED_WORKAROUND + // No single step exceptions if MODE.DEBUG_EN=0. + s_getreg_b32 ttmp2, hwreg(HW_REG_MODE) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK + s_cbranch_scc0 L_NO_SINGLE_STEP_WORKAROUND + + // Second-level trap already handled exception if STATUS.HALT=1. + s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK + + // Prioritize single step exception over context save. + // Second-level trap will halt wave and RFE, re-entering for SAVECTX. + s_cbranch_scc0 L_FETCH_2ND_TRAP + +L_NO_SINGLE_STEP_WORKAROUND: +end + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save s_cbranch_scc1 L_SAVE //this is the operation for save // ********* Handle non-CWSR traps ******************* -if (!EMU_RUN_HACK) + // Illegal instruction is a non-maskable exception which blocks context save. // Halt the wavefront and return from the trap. s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK @@ -330,7 +277,7 @@ set_status_without_spi_prio(s_save_status, ttmp2) s_rfe_b64 [ttmp0, ttmp1] -end + // ********* End handling of non-CWSR traps ******************* /**************************************************************************/ @@ -338,12 +285,6 @@ /**************************************************************************/ L_SAVE: - -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_save_s - s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? -end - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] s_mov_b32 s_save_tmp, 0 //clear saveCtx bit @@ -365,16 +306,7 @@ s_mov_b32 s_save_exec_hi, exec_hi s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_sq_save_msg - s_waitcnt lgkmcnt(0) -end - - if (EMU_RUN_HACK) - - else s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC - end // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) @@ -383,33 +315,7 @@ L_SLEEP: s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 - if (EMU_RUN_HACK) - - else s_cbranch_execz L_SLEEP - end - -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_spi_wrexec - s_waitcnt lgkmcnt(0) -end - - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) - //calculate wd_addr using absolute thread id - v_readlane_b32 s_save_tmp, v9, 0 - s_lshr_b32 s_save_tmp, s_save_tmp, 6 - s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE - s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL - else - end - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) - s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL - else - end // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 @@ -459,20 +365,10 @@ s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0 - - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) - s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 - s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over - end - write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC @@ -510,17 +406,9 @@ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) - if (SGPR_SAVE_USE_SQC) s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) - end - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 @@ -563,30 +451,25 @@ s_mov_b32 xnack_mask_lo, 0x0 s_mov_b32 xnack_mask_hi, 0x0 - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end // VGPR Allocated in 4-GPR granularity -if G8SR_VGPR_SR_IN_DWX4 - // the const stride for DWx4 is 4*4 bytes - s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes - - buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - - s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes -else +if SAVE_AFTER_XNACK_ERROR + check_if_tcp_store_ok() + s_cbranch_scc1 L_SAVE_FIRST_VGPRS_WITH_TCP + + write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset) + s_branch L_SAVE_LDS + +L_SAVE_FIRST_VGPRS_WITH_TCP: +end + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 -end @@ -621,66 +504,34 @@ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end s_mov_b32 m0, 0x0 //lds_offset initial value = 0 -var LDS_DMA_ENABLE = 0 -var UNROLL = 0 -if UNROLL==0 && LDS_DMA_ENABLE==1 - s_mov_b32 s3, 256*2 - s_nop 0 - s_nop 0 - s_nop 0 - L_SAVE_LDS_LOOP: - //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.??? - if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity - buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW - buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW - end - - s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes - s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes - s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 - s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? - -elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss - // store from higest LDS address to lowest - s_mov_b32 s3, 256*2 - s_sub_u32 m0, s_save_alloc_size, s3 - s_add_u32 s_save_mem_offset, s_save_mem_offset, m0 - s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks... - s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest - s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction - s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc - s_nop 0 - s_nop 0 - s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes - s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved - s_add_u32 s0, s0,s_save_alloc_size - s_addc_u32 s1, s1, 0 - s_setpc_b64 s[0:1] - - - for var i =0; i< 128; i++ - // be careful to make here a 64Byte aligned address, which could improve performance... - buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW - buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW - - if i!=127 - s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline - s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3 - end - end - -else // BUFFER_STORE v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0 v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid + +if SAVE_AFTER_XNACK_ERROR + check_if_tcp_store_ok() + s_cbranch_scc1 L_SAVE_LDS_WITH_TCP + + v_lshlrev_b32 v2, 2, v3 +L_SAVE_LDS_LOOP_SQC: + ds_read2_b32 v[0:1], v2 offset0:0 offset1:0x40 + s_waitcnt lgkmcnt(0) + + write_vgprs_to_mem_with_sqc(v0, 2, s_save_buf_rsrc0, s_save_mem_offset) + + v_add_u32 v2, 0x200, v2 + v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size + s_cbranch_vccnz L_SAVE_LDS_LOOP_SQC + + s_branch L_SAVE_LDS_DONE + +L_SAVE_LDS_WITH_TCP: +end + v_mul_i32_i24 v2, v3, 8 // tid*8 v_mov_b32 v3, 256*2 s_mov_b32 m0, 0x10000 @@ -701,8 +552,6 @@ // restore rsrc3 s_mov_b32 s_save_buf_rsrc3, s0 -end - L_SAVE_LDS_DONE: @@ -720,44 +569,9 @@ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end - - - // VGPR Allocated in 4-GPR granularity -if G8SR_VGPR_SR_IN_DWX4 - // the const stride for DWx4 is 4*4 bytes - s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes - - s_mov_b32 m0, 4 // skip first 4 VGPRs - s_cmp_lt_u32 m0, s_save_alloc_size - s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs - - s_set_gpr_idx_on m0, 0x1 // This will change M0 - s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0 -L_SAVE_VGPR_LOOP: - v_mov_b32 v0, v0 // v0 = v[0+m0] - v_mov_b32 v1, v1 - v_mov_b32 v2, v2 - v_mov_b32 v3, v3 - - - buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 - s_add_u32 m0, m0, 4 - s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 - s_cmp_lt_u32 m0, s_save_alloc_size - s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? - s_set_gpr_idx_off -L_SAVE_VGPR_LOOP_END: - s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes -else // VGPR store using dw burst s_mov_b32 m0, 0x4 //VGPR initial index value =0 s_cmp_lt_u32 m0, s_save_alloc_size @@ -767,57 +581,82 @@ s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later +if SAVE_AFTER_XNACK_ERROR + check_if_tcp_store_ok() + s_cbranch_scc1 L_SAVE_VGPR_LOOP + +L_SAVE_VGPR_LOOP_SQC: + write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset) + + s_add_u32 m0, m0, 4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_VGPR_LOOP_SQC + + s_set_gpr_idx_off + s_branch L_SAVE_VGPR_END +end + L_SAVE_VGPR_LOOP: v_mov_b32 v0, v0 //v0 = v[0+m0] v_mov_b32 v1, v1 //v0 = v[0+m0] v_mov_b32 v2, v2 //v0 = v[0+m0] v_mov_b32 v3, v3 //v0 = v[0+m0] - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 - end s_add_u32 m0, m0, 4 //next vgpr index s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? s_set_gpr_idx_off -end L_SAVE_VGPR_END: +if ASIC_TARGET_ARCTURUS + // Save ACC VGPRs + s_mov_b32 m0, 0x0 //VGPR initial index value =0 + s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 +if SAVE_AFTER_XNACK_ERROR + check_if_tcp_store_ok() + s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP +L_SAVE_ACCVGPR_LOOP_SQC: + for var vgpr = 0; vgpr < 4; ++ vgpr + v_accvgpr_read v[vgpr], acc[vgpr] // v[N] = acc[N+m0] + end + + write_vgprs_to_mem_with_sqc(v0, 4, s_save_buf_rsrc0, s_save_mem_offset) + s_add_u32 m0, m0, 4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP_SQC + s_set_gpr_idx_off + s_branch L_SAVE_ACCVGPR_END +end - /* S_PGM_END_SAVED */ //FIXME graphics ONLY - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] - s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 - s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over - s_rfe_b64 s_save_pc_lo //Return to the main shader program - else +L_SAVE_ACCVGPR_LOOP: + for var vgpr = 0; vgpr < 4; ++ vgpr + v_accvgpr_read v[vgpr], acc[vgpr] // v[N] = acc[N+m0] end -// Save Done timestamp -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_save_d - // SGPR SR memory offset : size(VGPR) - get_vgpr_size_bytes(s_save_mem_offset) - s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET - s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? - // Need reset rsrc2?? - s_mov_b32 m0, s_save_mem_offset - s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1 -end + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 + + s_add_u32 m0, m0, 4 + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_ACCVGPR_LOOP + s_set_gpr_idx_off +L_SAVE_ACCVGPR_END: +end s_branch L_END_PGM @@ -829,27 +668,6 @@ L_RESTORE: /* Setup Resource Contants */ - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) - //calculate wd_addr using absolute thread id - v_readlane_b32 s_restore_tmp, v9, 0 - s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 - s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE - s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO - s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI - s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL - else - end - -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_restore_s - s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? - // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case... - s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0] - s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored.. -end - - - s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE @@ -891,18 +709,12 @@ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow??? - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end s_mov_b32 m0, 0x0 //lds_offset initial value = 0 L_RESTORE_LDS_LOOP: - if (SAVE_LDS) buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW - end s_add_u32 m0, m0, 256*2 // 128 DW s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 @@ -921,56 +733,43 @@ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else - s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end -if G8SR_VGPR_SR_IN_DWX4 - get_vgpr_size_bytes(s_restore_mem_offset) - s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 - - // the const stride for DWx4 is 4*4 bytes - s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes - - s_mov_b32 m0, s_restore_alloc_size - s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0 - -L_RESTORE_VGPR_LOOP: - buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 - s_waitcnt vmcnt(0) - s_sub_u32 m0, m0, 4 - v_mov_b32 v0, v0 // v[0+m0] = v0 - v_mov_b32 v1, v1 - v_mov_b32 v2, v2 - v_mov_b32 v3, v3 - s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 - s_cmp_eq_u32 m0, 0x8000 - s_cbranch_scc0 L_RESTORE_VGPR_LOOP - s_set_gpr_idx_off +if ASIC_TARGET_ARCTURUS + s_mov_b32 s_restore_accvgpr_offset, s_restore_buf_rsrc2 //ACC VGPRs at end of VGPRs +end - s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 - s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes -else // VGPR load using dw burst s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 +if ASIC_TARGET_ARCTURUS + s_mov_b32 s_restore_accvgpr_offset_save, s_restore_accvgpr_offset + s_add_u32 s_restore_accvgpr_offset, s_restore_accvgpr_offset, 256*4 +end s_mov_b32 m0, 4 //VGPR initial index value = 1 s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later L_RESTORE_VGPR_LOOP: - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else + +if ASIC_TARGET_ARCTURUS + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset slc:1 glc:1 offset:256*3 + s_add_u32 s_restore_accvgpr_offset, s_restore_accvgpr_offset, 256*4 + s_waitcnt vmcnt(0) + + for var vgpr = 0; vgpr < 4; ++ vgpr + v_accvgpr_write acc[vgpr], v[vgpr] + end +end + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 - end s_waitcnt vmcnt(0) //ensure data ready v_mov_b32 v0, v0 //v[0+m0] = v0 v_mov_b32 v1, v1 @@ -982,16 +781,22 @@ s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? s_set_gpr_idx_off /* VGPR restore on v0 */ - if(USE_MTBUF_INSTEAD_OF_MUBUF) - tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 - else +if ASIC_TARGET_ARCTURUS + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_accvgpr_offset_save slc:1 glc:1 offset:256*3 + s_waitcnt vmcnt(0) + + for var vgpr = 0; vgpr < 4; ++ vgpr + v_accvgpr_write acc[vgpr], v[vgpr] + end +end + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 - end - -end /* restore SGPRs */ ////////////////////////////// @@ -1007,16 +812,8 @@ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) - if (SGPR_SAVE_USE_SQC) s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes - else - s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) - end - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end s_mov_b32 m0, s_restore_alloc_size @@ -1044,11 +841,6 @@ L_RESTORE_HWREG: -if G8SR_DEBUG_TIMESTAMP - s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo - s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi -end - // HWREG SR memory offset : size(VGPR)+size(SGPR) get_vgpr_size_bytes(s_restore_mem_offset) get_sgpr_size_bytes(s_restore_tmp) @@ -1056,11 +848,7 @@ s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes - if (SWIZZLE_EN) - s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? - else s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes - end read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0 read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC @@ -1075,16 +863,6 @@ s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS - //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: - if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) - s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) - s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over - end - if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) - s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal - s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over - end - s_mov_b32 m0, s_restore_m0 s_mov_b32 exec_lo, s_restore_exec_lo s_mov_b32 exec_hi, s_restore_exec_hi @@ -1131,11 +909,6 @@ s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time -if G8SR_DEBUG_TIMESTAMP - s_memrealtime s_g8sr_ts_restore_d - s_waitcnt lgkmcnt(0) -end - // s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc @@ -1190,7 +963,39 @@ s_sub_u32 s_mem_offset, s_mem_offset, 4*16 end +function check_if_tcp_store_ok + // If STATUS.ALLOW_REPLAY=0 and TRAPSTS.XNACK_ERROR=1 then TCP stores will fail. + s_and_b32 s_save_tmp, s_save_status, SQ_WAVE_STATUS_ALLOW_REPLAY_MASK + s_cbranch_scc1 L_TCP_STORE_CHECK_DONE + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_TRAPSTS) + s_andn2_b32 s_save_tmp, SQ_WAVE_TRAPSTS_XNACK_ERROR_MASK, s_save_tmp + +L_TCP_STORE_CHECK_DONE: +end + +function write_vgpr_to_mem_with_sqc(v, s_rsrc, s_mem_offset) + s_mov_b32 s4, 0 +L_WRITE_VGPR_LANE_LOOP: + for var lane = 0; lane < 4; ++ lane + v_readlane_b32 s[lane], v, s4 + s_add_u32 s4, s4, 1 + end + + s_buffer_store_dwordx4 s[0:3], s_rsrc, s_mem_offset glc:1 + ack_sqc_store_workaround() + + s_add_u32 s_mem_offset, s_mem_offset, 0x10 + s_cmp_eq_u32 s4, 0x40 + s_cbranch_scc0 L_WRITE_VGPR_LANE_LOOP +end + +function write_vgprs_to_mem_with_sqc(v, n_vgprs, s_rsrc, s_mem_offset) + for var vgpr = 0; vgpr < n_vgprs; ++ vgpr + write_vgpr_to_mem_with_sqc(v[vgpr], s_rsrc, s_mem_offset) + end +end function get_lds_size_bytes(s_lds_size_byte) // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW @@ -1202,6 +1007,10 @@ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible + +if ASIC_TARGET_ARCTURUS + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, 1 // Double size for ACC VGPRs +end end function get_sgpr_size_bytes(s_sgpr_size_byte) diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 2019-08-31 15:01:11.852736168 -0500 @@ -274,154 +274,227 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf82015e, + 0xbf820001, 0xbf820248, 0xb8f8f802, 0x89788678, - 0xb8fbf803, 0x866eff7b, - 0x00000400, 0xbf85003b, - 0x866eff7b, 0x00000800, - 0xbf850003, 0x866eff7b, - 0x00000100, 0xbf84000c, + 0xb8eef801, 0x866eff6e, + 0x00000800, 0xbf840003, 0x866eff78, 0x00002000, - 0xbf840005, 0xbf8e0010, - 0xb8eef803, 0x866eff6e, - 0x00000400, 0xbf84fffb, - 0x8778ff78, 0x00002000, - 0x80ec886c, 0x82ed806d, - 0xb8eef807, 0x866fff6e, - 0x001f8000, 0x8e6f8b6f, - 0x8977ff77, 0xfc000000, - 0x87776f77, 0x896eff6e, - 0x001f8000, 0xb96ef807, - 0xb8faf812, 0xb8fbf813, - 0x8efa887a, 0xc0071bbd, - 0x00000000, 0xbf8cc07f, - 0xc0071ebd, 0x00000008, - 0xbf8cc07f, 0x86ee6e6e, - 0xbf840001, 0xbe801d6e, - 0xb8fbf803, 0x867bff7b, - 0x000001ff, 0xbf850002, - 0x806c846c, 0x826d806d, + 0xbf840016, 0xb8fbf803, + 0x866eff7b, 0x00000400, + 0xbf85003b, 0x866eff7b, + 0x00000800, 0xbf850003, + 0x866eff7b, 0x00000100, + 0xbf84000c, 0x866eff78, + 0x00002000, 0xbf840005, + 0xbf8e0010, 0xb8eef803, + 0x866eff6e, 0x00000400, + 0xbf84fffb, 0x8778ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xb8eef807, + 0x866fff6e, 0x001f8000, + 0x8e6f8b6f, 0x8977ff77, + 0xfc000000, 0x87776f77, + 0x896eff6e, 0x001f8000, + 0xb96ef807, 0xb8faf812, + 0xb8fbf813, 0x8efa887a, + 0xc0071bbd, 0x00000000, + 0xbf8cc07f, 0xc0071ebd, + 0x00000008, 0xbf8cc07f, + 0x86ee6e6e, 0xbf840001, + 0xbe801d6e, 0xb8fbf803, + 0x867bff7b, 0x000001ff, + 0xbf850002, 0x806c846c, + 0x826d806d, 0x866dff6d, + 0x0000ffff, 0x8f6e8b77, + 0x866eff6e, 0x001f8000, + 0xb96ef807, 0x86fe7e7e, + 0x86ea6a6a, 0x8f6e8378, + 0xb96ee0c2, 0xbf800002, + 0xb9780002, 0xbe801f6c, 0x866dff6d, 0x0000ffff, - 0x8f6e8b77, 0x866eff6e, - 0x001f8000, 0xb96ef807, - 0x86fe7e7e, 0x86ea6a6a, - 0x8f6e8378, 0xb96ee0c2, - 0xbf800002, 0xb9780002, - 0xbe801f6c, 0x866dff6d, - 0x0000ffff, 0xbefa0080, - 0xb97a0283, 0xb8fa2407, - 0x8e7a9b7a, 0x876d7a6d, - 0xb8fa03c7, 0x8e7a9a7a, - 0x876d7a6d, 0xb8faf807, - 0x867aff7a, 0x00007fff, - 0xb97af807, 0xbeee007e, - 0xbeef007f, 0xbefe0180, - 0xbf900004, 0x877a8478, - 0xb97af802, 0xbf8e0002, - 0xbf88fffe, 0xb8fa2a05, - 0x807a817a, 0x8e7a8a7a, - 0xb8fb1605, 0x807b817b, - 0x8e7b867b, 0x807a7b7a, - 0x807a7e7a, 0x827b807f, - 0x867bff7b, 0x0000ffff, - 0xc04b1c3d, 0x00000050, - 0xbf8cc07f, 0xc04b1d3d, - 0x00000060, 0xbf8cc07f, - 0xc0431e7d, 0x00000074, - 0xbf8cc07f, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x867aff7f, - 0x08000000, 0x8f7a837a, - 0x87777a77, 0x867aff7f, - 0x70000000, 0x8f7a817a, - 0x87777a77, 0xbef1007c, - 0xbef00080, 0xb8f02a05, - 0x80708170, 0x8e708a70, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x80707a70, - 0xbef60084, 0xbef600ff, - 0x01000000, 0xbefe007c, - 0xbefc0070, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x80708470, 0xbefc007e, + 0xbefa0080, 0xb97a0283, + 0xb8fa2407, 0x8e7a9b7a, + 0x876d7a6d, 0xb8fa03c7, + 0x8e7a9a7a, 0x876d7a6d, + 0xb8faf807, 0x867aff7a, + 0x00007fff, 0xb97af807, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x877a8478, 0xb97af802, + 0xbf8e0002, 0xbf88fffe, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0xb8fb1605, + 0x807b817b, 0x8e7b867b, + 0x807a7b7a, 0x807a7e7a, + 0x827b807f, 0x867bff7b, + 0x0000ffff, 0xc04b1c3d, + 0x00000050, 0xbf8cc07f, + 0xc04b1d3d, 0x00000060, + 0xbf8cc07f, 0xc0431e7d, + 0x00000074, 0xbf8cc07f, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x867aff7f, 0x08000000, + 0x8f7a837a, 0x87777a77, + 0x867aff7f, 0x70000000, + 0x8f7a817a, 0x87777a77, + 0xbef1007c, 0xbef00080, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0xbef60084, + 0xbef600ff, 0x01000000, 0xbefe007c, 0xbefc0070, - 0xc0611b3a, 0x0000007c, + 0xc0611c7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611b7a, + 0xbefc0070, 0xc0611b3a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611bba, 0x0000007c, + 0xc0611b7a, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611bfa, + 0xbefc0070, 0xc0611bba, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611e3a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xb8fbf803, - 0xbefe007c, 0xbefc0070, - 0xc0611efa, 0x0000007c, + 0xc0611bfa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc0070, 0xc0611a3a, + 0xbefc0070, 0xc0611e3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8fbf803, 0xbefe007c, + 0xbefc0070, 0xc0611efa, 0x0000007c, 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, 0xbefc0070, - 0xc0611a7a, 0x0000007c, - 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0xb8f1f801, - 0xbefe007c, 0xbefc0070, - 0xc0611c7a, 0x0000007c, + 0xc0611a3a, 0x0000007c, 0xbf8cc07f, 0x80708470, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbeef0080, - 0x876f6f7a, 0xb8f02a05, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8f1f801, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8fb1605, + 0x807b817b, 0x8e7b847b, + 0x8e76827b, 0xbef600ff, + 0x01000000, 0xbef20174, + 0x80747074, 0x82758075, + 0xbefc0080, 0xbf800000, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003a, 0x00000000, + 0xbf8cc07f, 0xc06b013a, + 0x00000010, 0xbf8cc07f, + 0xc06b023a, 0x00000020, + 0xbf8cc07f, 0xc06b033a, + 0x00000030, 0xbf8cc07f, + 0x8074c074, 0x82758075, + 0x807c907c, 0xbf0a7b7c, + 0xbf85ffe7, 0xbef40172, + 0xbef00080, 0xbefe00c1, + 0xbeff00c1, 0xbee80080, + 0xbee90080, 0xbef600ff, + 0x01000000, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf85004d, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbf820008, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0xbefe00c1, + 0xbeff00c1, 0xb8fb4306, + 0x867bc17b, 0xbf840063, + 0xbf8a0000, 0x867aff6f, + 0x04000000, 0xbf84005f, + 0x8e7b867b, 0x8e7b827b, + 0xbef6007b, 0xb8f02a05, 0x80708170, 0x8e708a70, - 0xb8fb1605, 0x807b817b, - 0x8e7b847b, 0x8e76827b, - 0xbef600ff, 0x01000000, - 0xbef20174, 0x80747074, - 0x82758075, 0xbefc0080, - 0xbf800000, 0xbe802b00, - 0xbe822b02, 0xbe842b04, - 0xbe862b06, 0xbe882b08, - 0xbe8a2b0a, 0xbe8c2b0c, - 0xbe8e2b0e, 0xc06b003a, - 0x00000000, 0xbf8cc07f, - 0xc06b013a, 0x00000010, - 0xbf8cc07f, 0xc06b023a, - 0x00000020, 0xbf8cc07f, - 0xc06b033a, 0x00000030, - 0xbf8cc07f, 0x8074c074, - 0x82758075, 0x807c907c, - 0xbf0a7b7c, 0xbf85ffe7, - 0xbef40172, 0xbef00080, - 0xbefe00c1, 0xbeff00c1, - 0xbee80080, 0xbee90080, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, 0xbef600ff, 0x01000000, - 0xe0724000, 0x701d0000, - 0xe0724100, 0x701d0100, - 0xe0724200, 0x701d0200, - 0xe0724300, 0x701d0300, - 0xbefe00c1, 0xbeff00c1, - 0xb8fb4306, 0x867bc17b, - 0xbf84002c, 0xbf8a0000, - 0x867aff6f, 0x04000000, - 0xbf840028, 0x8e7b867b, - 0x8e7b827b, 0xbef6007b, - 0xb8f02a05, 0x80708170, - 0x8e708a70, 0xb8fa1605, - 0x807a817a, 0x8e7a867a, - 0x80707a70, 0x8070ff70, - 0x00000080, 0xbef600ff, - 0x01000000, 0xbefc0080, - 0xd28c0002, 0x000100c1, - 0xd28d0003, 0x000204c1, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, 0xd1060002, 0x00011103, 0x7e0602ff, 0x00000200, 0xbefc00ff, 0x00010000, @@ -438,9 +511,53 @@ 0x807b817b, 0x8e7b827b, 0x8e76887b, 0xbef600ff, 0x01000000, 0xbefc0084, - 0xbf0a7b7c, 0xbf840015, + 0xbf0a7b7c, 0xbf84006d, 0xbf11017c, 0x807bff7b, - 0x00001000, 0x7e000300, + 0x00001000, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850051, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffb1, 0xbf9c0000, + 0xbf820012, 0x7e000300, 0x7e020301, 0x7e040302, 0x7e060303, 0xe0724000, 0x701d0000, 0xe0724100, @@ -563,24 +680,47 @@ }; static const uint32_t cwsr_trap_gfx10_hex[] = { - 0xbf820001, 0xbf82012e, - 0xb0804004, 0xb970f802, - 0x8a708670, 0xb971f803, - 0x8771ff71, 0x00000400, - 0xbf850008, 0xb971f803, - 0x8771ff71, 0x000001ff, - 0xbf850001, 0x806c846c, + 0xbf820001, 0xbf8201c1, + 0xb0804004, 0xb978f802, + 0x8a788678, 0xb971f803, + 0x876eff71, 0x00000400, + 0xbf850033, 0x876eff71, + 0x00000100, 0xbf840002, + 0x8878ff78, 0x00002000, + 0x8a77ff77, 0xff000000, + 0xb96ef807, 0x876fff6e, + 0x02000000, 0x8f6f866f, + 0x88776f77, 0x876fff6e, + 0x003f8000, 0x8f6f896f, + 0x88776f77, 0x8a6eff6e, + 0x023f8000, 0xb9eef807, + 0xb970f812, 0xb971f813, + 0x8ff08870, 0xf4051bb8, + 0xfa000000, 0xbf8cc07f, + 0xf4051c38, 0xfa000008, + 0xbf8cc07f, 0x87ee6e6e, + 0xbf840001, 0xbe80206e, + 0xb971f803, 0x8771ff71, + 0x000001ff, 0xbf850002, + 0x806c846c, 0x826d806d, + 0x876dff6d, 0x0000ffff, + 0x906e8977, 0x876fff6e, + 0x003f8000, 0x906e8677, + 0x876eff6e, 0x02000000, + 0x886e6f6e, 0xb9eef807, + 0x87fe7e7e, 0x87ea6a6a, + 0xb9f8f802, 0xbe80226c, + 0xb971f803, 0x8771ff71, + 0x00000100, 0xbf840006, + 0xbef60380, 0xb9f60203, 0x876dff6d, 0x0000ffff, - 0xbe80226c, 0xb971f803, - 0x8771ff71, 0x00000100, - 0xbf840006, 0xbef60380, - 0xb9f60203, 0x876dff6d, - 0x0000ffff, 0x80ec886c, - 0x82ed806d, 0xbef60380, - 0xb9f60283, 0xb973f816, - 0xb9762c07, 0x8f769c76, - 0x886d766d, 0xb97603c7, - 0x8f769b76, 0x886d766d, + 0x80ec886c, 0x82ed806d, + 0xbef60380, 0xb9f60283, + 0xb972f816, 0xb9762c07, + 0x8f769a76, 0x886d766d, + 0xb97603c7, 0x8f769976, + 0x886d766d, 0xb9760647, + 0x8f769876, 0x886d766d, 0xb976f807, 0x8776ff76, 0x00007fff, 0xb9f6f807, 0xbeee037e, 0xbeef037f, @@ -589,274 +729,833 @@ 0xbef4037e, 0x8775ff7f, 0x0000ffff, 0x8875ff75, 0x00040000, 0xbef60380, - 0xbef703ff, 0x00807fac, + 0xbef703ff, 0x10807fac, 0x8776ff7f, 0x08000000, 0x90768376, 0x88777677, 0x8776ff7f, 0x70000000, 0x90768176, 0x88777677, 0xbefb037c, 0xbefa0380, - 0xb97202dc, 0x8872727f, - 0xbefe03c1, 0x877c8172, - 0xbf06817c, 0xbf850002, - 0xbeff0380, 0xbf820001, - 0xbeff03c1, 0xb9712a05, - 0x80718171, 0x8f718271, - 0x877c8172, 0xbf06817c, - 0xbf85000d, 0x8f768771, + 0xb97302dc, 0x8f739973, + 0x8873737f, 0xb97a2a05, + 0x807a817a, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850002, 0x8f7a897a, + 0xbf820001, 0x8f7a8a7a, + 0xb9761e06, 0x8f768a76, + 0x807a767a, 0x807aff7a, + 0x00000200, 0xbef603ff, + 0x01000000, 0xbefe037c, + 0xbefc037a, 0xf4611efa, + 0xf8000000, 0x807a847a, + 0xbefc037e, 0xbefe037c, + 0xbefc037a, 0xf4611b3a, + 0xf8000000, 0x807a847a, + 0xbefc037e, 0xbefe037c, + 0xbefc037a, 0xf4611b7a, + 0xf8000000, 0x807a847a, + 0xbefc037e, 0xbefe037c, + 0xbefc037a, 0xf4611bba, + 0xf8000000, 0x807a847a, + 0xbefc037e, 0xbefe037c, + 0xbefc037a, 0xf4611bfa, + 0xf8000000, 0x807a847a, + 0xbefc037e, 0xbefe037c, + 0xbefc037a, 0xf4611e3a, + 0xf8000000, 0x807a847a, + 0xbefc037e, 0xb971f803, + 0xbefe037c, 0xbefc037a, + 0xf4611c7a, 0xf8000000, + 0x807a847a, 0xbefc037e, + 0xbefe037c, 0xbefc037a, + 0xf4611cba, 0xf8000000, + 0x807a847a, 0xbefc037e, + 0xb97bf801, 0xbefe037c, + 0xbefc037a, 0xf4611efa, + 0xf8000000, 0x807a847a, + 0xbefc037e, 0xb97bf814, + 0xbefe037c, 0xbefc037a, + 0xf4611efa, 0xf8000000, + 0x807a847a, 0xbefc037e, + 0xb97bf815, 0xbefe037c, + 0xbefc037a, 0xf4611efa, + 0xf8000000, 0x807a847a, + 0xbefc037e, 0x8776ff7f, + 0x04000000, 0xbeef0380, + 0x886f6f76, 0xb97a2a05, + 0x807a817a, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850002, 0x8f7a897a, + 0xbf820001, 0x8f7a8a7a, + 0xb9761e06, 0x8f768a76, + 0x807a767a, 0xbef603ff, + 0x01000000, 0xbef20374, + 0x80747a74, 0x82758075, + 0xbefc0380, 0xbf800000, + 0xbe802f00, 0xbe822f02, + 0xbe842f04, 0xbe862f06, + 0xbe882f08, 0xbe8a2f0a, + 0xbe8c2f0c, 0xbe8e2f0e, + 0xf469003a, 0xfa000000, + 0xf469013a, 0xfa000010, + 0xf469023a, 0xfa000020, + 0xf469033a, 0xfa000030, + 0x8074c074, 0x82758075, + 0x807c907c, 0xbf0aff7c, + 0x00000060, 0xbf85ffea, + 0xbe802f00, 0xbe822f02, + 0xbe842f04, 0xbe862f06, + 0xbe882f08, 0xbe8a2f0a, + 0xf469003a, 0xfa000000, + 0xf469013a, 0xfa000010, + 0xf469023a, 0xfa000020, + 0x8074b074, 0x82758075, + 0xbef40372, 0xbefa0380, + 0xbefe03c1, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850002, 0xbeff0380, + 0xbf820002, 0xbeff03c1, + 0xbf82000b, 0xbef603ff, + 0x01000000, 0xe0704000, + 0x7a5d0000, 0xe0704080, + 0x7a5d0100, 0xe0704100, + 0x7a5d0200, 0xe0704180, + 0x7a5d0300, 0xbf82000a, 0xbef603ff, 0x01000000, - 0xbefc0380, 0x7e008700, 0xe0704000, 0x7a5d0000, - 0x807c817c, 0x807aff7a, - 0x00000080, 0xbf0a717c, - 0xbf85fff8, 0xbf82001b, - 0x8f768871, 0xbef603ff, - 0x01000000, 0xbefc0380, - 0x7e008700, 0xe0704000, - 0x7a5d0000, 0x807c817c, - 0x807aff7a, 0x00000100, - 0xbf0a717c, 0xbf85fff8, - 0xb9711e06, 0x8771c171, - 0xbf84000c, 0x8f718371, - 0x80717c71, 0xbefe03c1, - 0xbeff0380, 0x7e008700, - 0xe0704000, 0x7a5d0000, - 0x807c817c, 0x807aff7a, - 0x00000080, 0xbf0a717c, - 0xbf85fff8, 0xbf8a0000, - 0x8776ff72, 0x04000000, - 0xbf84002b, 0xbefe03c1, - 0x877c8172, 0xbf06817c, + 0xe0704100, 0x7a5d0100, + 0xe0704200, 0x7a5d0200, + 0xe0704300, 0x7a5d0300, + 0xbefe03c1, 0x907c9973, + 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820001, 0xbeff03c1, 0xb9714306, 0x8771c171, - 0xbf840021, 0x8f718671, + 0xbf840046, 0xbf8a0000, + 0x8776ff6f, 0x04000000, + 0xbf840042, 0x8f718671, 0x8f718271, 0xbef60371, + 0xb97a2a05, 0x807a817a, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850002, + 0x8f7a897a, 0xbf820001, + 0x8f7a8a7a, 0xb9761e06, + 0x8f768a76, 0x807a767a, + 0x807aff7a, 0x00000200, + 0x807aff7a, 0x00000080, 0xbef603ff, 0x01000000, 0xd7650000, 0x000100c1, 0xd7660000, 0x000200c1, - 0x16000084, 0x877c8172, - 0xbf06817c, 0xbefc0380, - 0xbf85000a, 0x807cff7c, - 0x00000080, 0x807aff7a, - 0x00000080, 0xd5250000, - 0x0001ff00, 0x00000080, - 0xbf0a717c, 0xbf85fff7, - 0xbf820009, 0x807cff7c, - 0x00000100, 0x807aff7a, - 0x00000100, 0xd5250000, - 0x0001ff00, 0x00000100, - 0xbf0a717c, 0xbf85fff7, - 0x877c8172, 0xbf06817c, - 0xbf850003, 0x8f7687ff, - 0x0000006a, 0xbf820002, - 0x8f7688ff, 0x0000006a, + 0x16000084, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbefc0380, 0xbf850012, + 0xbe8303ff, 0x00000080, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf8c0000, + 0xe0704000, 0x7a5d0100, + 0x807c037c, 0x807a037a, + 0xd5250000, 0x0001ff00, + 0x00000080, 0xbf0a717c, + 0xbf85fff4, 0xbf820011, + 0xbe8303ff, 0x00000100, + 0xbf800000, 0xbf800000, + 0xbf800000, 0xd8d80000, + 0x01000000, 0xbf8c0000, + 0xe0704000, 0x7a5d0100, + 0x807c037c, 0x807a037a, + 0xd5250000, 0x0001ff00, + 0x00000100, 0xbf0a717c, + 0xbf85fff4, 0xbefe03c1, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850004, + 0xbefa03ff, 0x00000200, + 0xbeff0380, 0xbf820003, + 0xbefa03ff, 0x00000400, + 0xbeff03c1, 0xb9712a05, + 0x80718171, 0x8f718271, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850017, 0xbef603ff, 0x01000000, - 0x877c8172, 0xbf06817c, - 0xbefc0380, 0xbf800000, - 0xbf85000b, 0xbe802e00, - 0x7e000200, 0xe0704000, - 0x7a5d0000, 0x807aff7a, - 0x00000080, 0x807c817c, - 0xbf0aff7c, 0x0000006a, - 0xbf85fff6, 0xbf82000a, - 0xbe802e00, 0x7e000200, - 0xe0704000, 0x7a5d0000, - 0x807aff7a, 0x00000100, - 0x807c817c, 0xbf0aff7c, - 0x0000006a, 0xbf85fff6, - 0xbef60384, 0xbef603ff, - 0x01000000, 0x877c8172, - 0xbf06817c, 0xbf850030, - 0x7e00027b, 0xe0704000, - 0x7a5d0000, 0x807aff7a, - 0x00000080, 0x7e00026c, - 0xe0704000, 0x7a5d0000, - 0x807aff7a, 0x00000080, - 0x7e00026d, 0xe0704000, - 0x7a5d0000, 0x807aff7a, - 0x00000080, 0x7e00026e, - 0xe0704000, 0x7a5d0000, - 0x807aff7a, 0x00000080, - 0x7e00026f, 0xe0704000, - 0x7a5d0000, 0x807aff7a, - 0x00000080, 0x7e000270, - 0xe0704000, 0x7a5d0000, - 0x807aff7a, 0x00000080, - 0xb971f803, 0x7e000271, + 0xbefc0384, 0xbf0a717c, + 0xbf840037, 0x7e008700, + 0x7e028701, 0x7e048702, + 0x7e068703, 0xe0704000, + 0x7a5d0000, 0xe0704080, + 0x7a5d0100, 0xe0704100, + 0x7a5d0200, 0xe0704180, + 0x7a5d0300, 0x807c847c, + 0x807aff7a, 0x00000200, + 0xbf0a717c, 0xbf85ffef, + 0xbf820025, 0xbef603ff, + 0x01000000, 0xbefc0384, + 0xbf0a717c, 0xbf840020, + 0x7e008700, 0x7e028701, + 0x7e048702, 0x7e068703, 0xe0704000, 0x7a5d0000, + 0xe0704100, 0x7a5d0100, + 0xe0704200, 0x7a5d0200, + 0xe0704300, 0x7a5d0300, + 0x807c847c, 0x807aff7a, + 0x00000400, 0xbf0a717c, + 0xbf85ffef, 0xb9711e06, + 0x8771c171, 0xbf84000c, + 0x8f718371, 0x80717c71, + 0xbefe03c1, 0xbeff0380, + 0x7e008700, 0xe0704000, + 0x7a5d0000, 0x807c817c, 0x807aff7a, 0x00000080, - 0x7e000273, 0xe0704000, - 0x7a5d0000, 0x807aff7a, - 0x00000080, 0xb97bf801, - 0x7e00027b, 0xe0704000, - 0x7a5d0000, 0x807aff7a, - 0x00000080, 0xbf82002f, - 0x7e00027b, 0xe0704000, - 0x7a5d0000, 0x807aff7a, - 0x00000100, 0x7e00026c, - 0xe0704000, 0x7a5d0000, - 0x807aff7a, 0x00000100, - 0x7e00026d, 0xe0704000, - 0x7a5d0000, 0x807aff7a, - 0x00000100, 0x7e00026e, - 0xe0704000, 0x7a5d0000, - 0x807aff7a, 0x00000100, - 0x7e00026f, 0xe0704000, - 0x7a5d0000, 0x807aff7a, - 0x00000100, 0x7e000270, - 0xe0704000, 0x7a5d0000, - 0x807aff7a, 0x00000100, - 0xb971f803, 0x7e000271, - 0xe0704000, 0x7a5d0000, - 0x807aff7a, 0x00000100, - 0x7e000273, 0xe0704000, - 0x7a5d0000, 0x807aff7a, - 0x00000100, 0xb97bf801, - 0x7e00027b, 0xe0704000, - 0x7a5d0000, 0x807aff7a, - 0x00000100, 0xbf820119, - 0xbef4037e, 0x8775ff7f, - 0x0000ffff, 0x8875ff75, - 0x00040000, 0xbef60380, - 0xbef703ff, 0x00807fac, - 0x8772ff7f, 0x08000000, - 0x90728372, 0x88777277, - 0x8772ff7f, 0x70000000, - 0x90728172, 0x88777277, - 0xb97902dc, 0x8879797f, - 0xbef80380, 0xbefe03c1, - 0x877c8179, 0xbf06817c, + 0xbf0a717c, 0xbf85fff8, + 0xbf820141, 0xbef4037e, + 0x8775ff7f, 0x0000ffff, + 0x8875ff75, 0x00040000, + 0xbef60380, 0xbef703ff, + 0x10807fac, 0x8772ff7f, + 0x08000000, 0x90728372, + 0x88777277, 0x8772ff7f, + 0x70000000, 0x90728172, + 0x88777277, 0xb97302dc, + 0x8f739973, 0x8873737f, + 0x8772ff7f, 0x04000000, + 0xbf840036, 0xbefe03c1, + 0x907c9973, 0x877c817c, + 0xbf06817c, 0xbf850002, + 0xbeff0380, 0xbf820001, + 0xbeff03c1, 0xb96f4306, + 0x876fc16f, 0xbf84002b, + 0x8f6f866f, 0x8f6f826f, + 0xbef6036f, 0xb9782a05, + 0x80788178, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850002, 0x8f788978, + 0xbf820001, 0x8f788a78, + 0xb9721e06, 0x8f728a72, + 0x80787278, 0x8078ff78, + 0x00000200, 0x8078ff78, + 0x00000080, 0xbef603ff, + 0x01000000, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbefc0380, 0xbf850009, + 0xe0310000, 0x781d0000, + 0x807cff7c, 0x00000080, + 0x8078ff78, 0x00000080, + 0xbf0a6f7c, 0xbf85fff8, + 0xbf820008, 0xe0310000, + 0x781d0000, 0x807cff7c, + 0x00000100, 0x8078ff78, + 0x00000100, 0xbf0a6f7c, + 0xbf85fff8, 0xbef80380, + 0xbefe03c1, 0x907c9973, + 0x877c817c, 0xbf06817c, 0xbf850002, 0xbeff0380, 0xbf820001, 0xbeff03c1, 0xb96f2a05, 0x806f816f, - 0x8f6f826f, 0x877c8179, - 0xbf06817c, 0xbf850013, - 0x8f76876f, 0xbef603ff, + 0x8f6f826f, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850021, 0xbef603ff, 0x01000000, 0xbef20378, - 0x8078ff78, 0x00000080, - 0xbefc0381, 0xe0304000, - 0x785d0000, 0xbf8c3f70, - 0x7e008500, 0x807c817c, - 0x8078ff78, 0x00000080, - 0xbf0a6f7c, 0xbf85fff7, - 0xe0304000, 0x725d0000, - 0xbf820023, 0x8f76886f, + 0x8078ff78, 0x00000200, + 0xbefc0384, 0xe0304000, + 0x785d0000, 0xe0304080, + 0x785d0100, 0xe0304100, + 0x785d0200, 0xe0304180, + 0x785d0300, 0xbf8c3f70, + 0x7e008500, 0x7e028501, + 0x7e048502, 0x7e068503, + 0x807c847c, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85ffee, 0xe0304000, + 0x725d0000, 0xe0304080, + 0x725d0100, 0xe0304100, + 0x725d0200, 0xe0304180, + 0x725d0300, 0xbf820031, 0xbef603ff, 0x01000000, 0xbef20378, 0x8078ff78, - 0x00000100, 0xbefc0381, + 0x00000400, 0xbefc0384, 0xe0304000, 0x785d0000, + 0xe0304100, 0x785d0100, + 0xe0304200, 0x785d0200, + 0xe0304300, 0x785d0300, 0xbf8c3f70, 0x7e008500, - 0x807c817c, 0x8078ff78, - 0x00000100, 0xbf0a6f7c, - 0xbf85fff7, 0xb96f1e06, - 0x876fc16f, 0xbf84000e, - 0x8f6f836f, 0x806f7c6f, - 0xbefe03c1, 0xbeff0380, - 0xe0304000, 0x785d0000, - 0xbf8c3f70, 0x7e008500, - 0x807c817c, 0x8078ff78, - 0x00000080, 0xbf0a6f7c, - 0xbf85fff7, 0xbeff03c1, - 0xe0304000, 0x725d0000, - 0x8772ff79, 0x04000000, - 0xbf840020, 0xbefe03c1, - 0x877c8179, 0xbf06817c, - 0xbf850002, 0xbeff0380, - 0xbf820001, 0xbeff03c1, - 0xb96f4306, 0x876fc16f, - 0xbf840016, 0x8f6f866f, - 0x8f6f826f, 0xbef6036f, - 0xbef603ff, 0x01000000, - 0x877c8172, 0xbf06817c, - 0xbefc0380, 0xbf850007, - 0x807cff7c, 0x00000080, - 0x8078ff78, 0x00000080, - 0xbf0a6f7c, 0xbf85fffa, - 0xbf820006, 0x807cff7c, - 0x00000100, 0x8078ff78, - 0x00000100, 0xbf0a6f7c, - 0xbf85fffa, 0x877c8179, - 0xbf06817c, 0xbf850003, - 0x8f7687ff, 0x0000006a, - 0xbf820002, 0x8f7688ff, - 0x0000006a, 0xbef603ff, - 0x01000000, 0x877c8179, - 0xbf06817c, 0xbf850012, - 0xf4211cba, 0xf0000000, + 0x7e028501, 0x7e048502, + 0x7e068503, 0x807c847c, + 0x8078ff78, 0x00000400, + 0xbf0a6f7c, 0xbf85ffee, + 0xb96f1e06, 0x876fc16f, + 0xbf84000e, 0x8f6f836f, + 0x806f7c6f, 0xbefe03c1, + 0xbeff0380, 0xe0304000, + 0x785d0000, 0xbf8c3f70, + 0x7e008500, 0x807c817c, 0x8078ff78, 0x00000080, - 0xbefc0381, 0xf421003a, - 0xf0000000, 0x8078ff78, - 0x00000080, 0xbf8cc07f, - 0xbe803000, 0xbf800000, - 0x807c817c, 0xbf0aff7c, - 0x0000006a, 0xbf85fff5, - 0xbe800372, 0xbf820011, - 0xf4211cba, 0xf0000000, - 0x8078ff78, 0x00000100, - 0xbefc0381, 0xf421003a, - 0xf0000000, 0x8078ff78, - 0x00000100, 0xbf8cc07f, - 0xbe803000, 0xbf800000, - 0x807c817c, 0xbf0aff7c, - 0x0000006a, 0xbf85fff5, - 0xbe800372, 0xbef60384, - 0xbef603ff, 0x01000000, - 0x877c8179, 0xbf06817c, - 0xbf850025, 0xf4211bfa, - 0xf0000000, 0x8078ff78, - 0x00000080, 0xf4211b3a, - 0xf0000000, 0x8078ff78, - 0x00000080, 0xf4211b7a, - 0xf0000000, 0x8078ff78, - 0x00000080, 0xf4211eba, - 0xf0000000, 0x8078ff78, - 0x00000080, 0xf4211efa, - 0xf0000000, 0x8078ff78, - 0x00000080, 0xf4211c3a, - 0xf0000000, 0x8078ff78, - 0x00000080, 0xf4211c7a, - 0xf0000000, 0x8078ff78, - 0x00000080, 0xf4211cfa, - 0xf0000000, 0x8078ff78, - 0x00000080, 0xf4211e7a, - 0xf0000000, 0x8078ff78, - 0x00000080, 0xbf820024, - 0xf4211bfa, 0xf0000000, - 0x8078ff78, 0x00000100, + 0xbf0a6f7c, 0xbf85fff7, + 0xbeff03c1, 0xe0304000, + 0x725d0000, 0xe0304100, + 0x725d0100, 0xe0304200, + 0x725d0200, 0xe0304300, + 0x725d0300, 0xb9782a05, + 0x80788178, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850002, 0x8f788978, + 0xbf820001, 0x8f788a78, + 0xb9721e06, 0x8f728a72, + 0x80787278, 0x8078ff78, + 0x00000200, 0x80f8ff78, + 0x00000050, 0xbef603ff, + 0x01000000, 0xbefc03ff, + 0x0000006c, 0x80f89078, + 0xf429003a, 0xf0000000, + 0xbf8cc07f, 0x80fc847c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0x80f8a078, + 0xf42d003a, 0xf0000000, + 0xbf8cc07f, 0x80fc887c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0xbe843104, + 0xbe863106, 0x80f8c078, + 0xf431003a, 0xf0000000, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe803100, + 0xbe823102, 0xbe843104, + 0xbe863106, 0xbe883108, + 0xbe8a310a, 0xbe8c310c, + 0xbe8e310e, 0xbf06807c, + 0xbf84fff0, 0xb9782a05, + 0x80788178, 0x907c9973, + 0x877c817c, 0xbf06817c, + 0xbf850002, 0x8f788978, + 0xbf820001, 0x8f788a78, + 0xb9721e06, 0x8f728a72, + 0x80787278, 0x8078ff78, + 0x00000200, 0xbef603ff, + 0x01000000, 0xf4211bfa, + 0xf0000000, 0x80788478, 0xf4211b3a, 0xf0000000, - 0x8078ff78, 0x00000100, - 0xf4211b7a, 0xf0000000, - 0x8078ff78, 0x00000100, + 0x80788478, 0xf4211b7a, + 0xf0000000, 0x80788478, 0xf4211eba, 0xf0000000, - 0x8078ff78, 0x00000100, - 0xf4211efa, 0xf0000000, - 0x8078ff78, 0x00000100, + 0x80788478, 0xf4211efa, + 0xf0000000, 0x80788478, 0xf4211c3a, 0xf0000000, - 0x8078ff78, 0x00000100, - 0xf4211c7a, 0xf0000000, - 0x8078ff78, 0x00000100, - 0xf4211cfa, 0xf0000000, - 0x8078ff78, 0x00000100, + 0x80788478, 0xf4211c7a, + 0xf0000000, 0x80788478, 0xf4211e7a, 0xf0000000, - 0x8078ff78, 0x00000100, - 0xbf8cc07f, 0x876dff6d, + 0x80788478, 0xf4211cfa, + 0xf0000000, 0x80788478, + 0xf4211bba, 0xf0000000, + 0x80788478, 0xbf8cc07f, + 0xb9eef814, 0xf4211bba, + 0xf0000000, 0x80788478, + 0xbf8cc07f, 0xb9eef815, + 0xbef2036d, 0x876dff72, 0x0000ffff, 0xbefc036f, 0xbefe037a, 0xbeff037b, 0x876f71ff, 0x000003ff, - 0xb9ef4803, 0xb9f3f816, + 0xb9ef4803, 0xb9f9f816, 0x876f71ff, 0xfffff800, 0x906f8b6f, 0xb9efa2c3, - 0xb9f9f801, 0x876fff6d, - 0xf0000000, 0x906f9c6f, - 0x8f6f906f, 0xbef20380, - 0x88726f72, 0x876fff6d, - 0x08000000, 0x906f9b6f, - 0x8f6f8f6f, 0x88726f72, - 0x876fff70, 0x00800000, - 0x906f976f, 0xb9f2f807, - 0xb9f0f802, 0xbf8a0000, - 0xbe80226c, 0xbf810000, + 0xb9f3f801, 0x876fff72, + 0xfc000000, 0x906f9a6f, + 0x8f6f906f, 0xbef30380, + 0x88736f73, 0x876fff72, + 0x02000000, 0x906f996f, + 0x8f6f8f6f, 0x88736f73, + 0x876fff72, 0x01000000, + 0x906f986f, 0x8f6f996f, + 0x88736f73, 0x876fff70, + 0x00800000, 0x906f976f, + 0xb9f3f807, 0x87fe7e7e, + 0x87ea6a6a, 0xb9f0f802, + 0xbf8a0000, 0xbe80226c, + 0xbf810000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, 0xbf9f0000, - 0xbf9f0000, 0x00000000, +}; +static const uint32_t cwsr_trap_arcturus_hex[] = { + 0xbf820001, 0xbf8202c4, + 0xb8f8f802, 0x89788678, + 0xb8eef801, 0x866eff6e, + 0x00000800, 0xbf840003, + 0x866eff78, 0x00002000, + 0xbf840016, 0xb8fbf803, + 0x866eff7b, 0x00000400, + 0xbf85003b, 0x866eff7b, + 0x00000800, 0xbf850003, + 0x866eff7b, 0x00000100, + 0xbf84000c, 0x866eff78, + 0x00002000, 0xbf840005, + 0xbf8e0010, 0xb8eef803, + 0x866eff6e, 0x00000400, + 0xbf84fffb, 0x8778ff78, + 0x00002000, 0x80ec886c, + 0x82ed806d, 0xb8eef807, + 0x866fff6e, 0x001f8000, + 0x8e6f8b6f, 0x8977ff77, + 0xfc000000, 0x87776f77, + 0x896eff6e, 0x001f8000, + 0xb96ef807, 0xb8faf812, + 0xb8fbf813, 0x8efa887a, + 0xc0071bbd, 0x00000000, + 0xbf8cc07f, 0xc0071ebd, + 0x00000008, 0xbf8cc07f, + 0x86ee6e6e, 0xbf840001, + 0xbe801d6e, 0xb8fbf803, + 0x867bff7b, 0x000001ff, + 0xbf850002, 0x806c846c, + 0x826d806d, 0x866dff6d, + 0x0000ffff, 0x8f6e8b77, + 0x866eff6e, 0x001f8000, + 0xb96ef807, 0x86fe7e7e, + 0x86ea6a6a, 0x8f6e8378, + 0xb96ee0c2, 0xbf800002, + 0xb9780002, 0xbe801f6c, + 0x866dff6d, 0x0000ffff, + 0xbefa0080, 0xb97a0283, + 0xb8fa2407, 0x8e7a9b7a, + 0x876d7a6d, 0xb8fa03c7, + 0x8e7a9a7a, 0x876d7a6d, + 0xb8faf807, 0x867aff7a, + 0x00007fff, 0xb97af807, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x877a8478, 0xb97af802, + 0xbf8e0002, 0xbf88fffe, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0x8e7a817a, + 0xb8fb1605, 0x807b817b, + 0x8e7b867b, 0x807a7b7a, + 0x807a7e7a, 0x827b807f, + 0x867bff7b, 0x0000ffff, + 0xc04b1c3d, 0x00000050, + 0xbf8cc07f, 0xc04b1d3d, + 0x00000060, 0xbf8cc07f, + 0xc0431e7d, 0x00000074, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x867aff7f, + 0x08000000, 0x8f7a837a, + 0x87777a77, 0x867aff7f, + 0x70000000, 0x8f7a817a, + 0x87777a77, 0xbef1007c, + 0xbef00080, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0x8e708170, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0xbef60084, + 0xbef600ff, 0x01000000, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611b3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611b7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611bba, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611bfa, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611e3a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8fbf803, 0xbefe007c, + 0xbefc0070, 0xc0611efa, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xbefe007c, + 0xbefc0070, 0xc0611a7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0xb8f1f801, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, + 0x0000007c, 0xbf8cc07f, + 0x80708470, 0xbefc007e, + 0x867aff7f, 0x04000000, + 0xbeef0080, 0x876f6f7a, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0x867aff78, 0x00400000, + 0xbf850003, 0xb8faf803, + 0x897a7aff, 0x10000000, + 0xbf85004d, 0xbe840080, + 0xd2890000, 0x00000900, + 0x80048104, 0xd2890001, + 0x00000900, 0x80048104, + 0xd2890002, 0x00000900, + 0x80048104, 0xd2890003, + 0x00000900, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000901, 0x80048104, + 0xd2890001, 0x00000901, + 0x80048104, 0xd2890002, + 0x00000901, 0x80048104, + 0xd2890003, 0x00000901, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000902, + 0x80048104, 0xd2890001, + 0x00000902, 0x80048104, + 0xd2890002, 0x00000902, + 0x80048104, 0xd2890003, + 0x00000902, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000903, 0x80048104, + 0xd2890001, 0x00000903, + 0x80048104, 0xd2890002, + 0x00000903, 0x80048104, + 0xd2890003, 0x00000903, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbf820008, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, + 0xbefe00c1, 0xbeff00c1, + 0xb8fb4306, 0x867bc17b, + 0xbf840064, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840060, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0x8e708170, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0x8070ff70, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850030, + 0x24040682, 0xd86e4000, + 0x00000002, 0xbf8cc07f, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x680404ff, 0x00000200, + 0xd0c9006a, 0x0000f702, + 0xbf87ffd2, 0xbf820015, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2a05, + 0x807b817b, 0x8e7b827b, + 0x8e76887b, 0xbef600ff, + 0x01000000, 0xbefc0084, + 0xbf0a7b7c, 0xbf84006d, + 0xbf11017c, 0x807bff7b, + 0x00001000, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850051, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffb1, 0xbf9c0000, + 0xbf820012, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffef, + 0xbf9c0000, 0xbefc0080, + 0xbf11017c, 0x867aff78, + 0x00400000, 0xbf850003, + 0xb8faf803, 0x897a7aff, + 0x10000000, 0xbf850059, + 0xd3d84000, 0x18000100, + 0xd3d84001, 0x18000101, + 0xd3d84002, 0x18000102, + 0xd3d84003, 0x18000103, + 0xbe840080, 0xd2890000, + 0x00000900, 0x80048104, + 0xd2890001, 0x00000900, + 0x80048104, 0xd2890002, + 0x00000900, 0x80048104, + 0xd2890003, 0x00000900, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000901, + 0x80048104, 0xd2890001, + 0x00000901, 0x80048104, + 0xd2890002, 0x00000901, + 0x80048104, 0xd2890003, + 0x00000901, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0xbe840080, 0xd2890000, + 0x00000902, 0x80048104, + 0xd2890001, 0x00000902, + 0x80048104, 0xd2890002, + 0x00000902, 0x80048104, + 0xd2890003, 0x00000902, + 0x80048104, 0xc069003a, + 0x00000070, 0xbf8cc07f, + 0x80709070, 0xbf06c004, + 0xbf84ffee, 0xbe840080, + 0xd2890000, 0x00000903, + 0x80048104, 0xd2890001, + 0x00000903, 0x80048104, + 0xd2890002, 0x00000903, + 0x80048104, 0xd2890003, + 0x00000903, 0x80048104, + 0xc069003a, 0x00000070, + 0xbf8cc07f, 0x80709070, + 0xbf06c004, 0xbf84ffee, + 0x807c847c, 0xbf0a7b7c, + 0xbf85ffa9, 0xbf9c0000, + 0xbf820016, 0xd3d84000, + 0x18000100, 0xd3d84001, + 0x18000101, 0xd3d84002, + 0x18000102, 0xd3d84003, + 0x18000103, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffeb, + 0xbf9c0000, 0xbf820106, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x08000000, + 0x8f6e836e, 0x87776e77, + 0x866eff7f, 0x70000000, + 0x8f6e816e, 0x87776e77, + 0x866eff7f, 0x04000000, + 0xbf84001f, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf84001a, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x8078ff78, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x781d0000, + 0xe0510100, 0x781d0000, + 0x807cff7c, 0x00000200, + 0x8078ff78, 0x00000200, + 0xbf0a6f7c, 0xbf85fff6, + 0xbef80080, 0xbefe00c1, + 0xbeff00c1, 0xb8ef2a05, + 0x806f816f, 0x8e6f826f, + 0x8e76886f, 0xbef90076, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbef30079, + 0x8079ff79, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0x806fff6f, 0x00008000, + 0xe0524000, 0x791d0000, + 0xe0524100, 0x791d0100, + 0xe0524200, 0x791d0200, + 0xe0524300, 0x791d0300, + 0x8079ff79, 0x00000400, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffdb, 0xbf9c0000, + 0xe0524000, 0x731d0000, + 0xe0524100, 0x731d0100, + 0xe0524200, 0x731d0200, + 0xe0524300, 0x731d0300, + 0xbf8c0f70, 0xd3d94000, + 0x18000100, 0xd3d94001, + 0x18000101, 0xd3d94002, + 0x18000102, 0xd3d94003, + 0x18000103, 0xe0524000, + 0x6e1d0000, 0xe0524100, + 0x6e1d0100, 0xe0524200, + 0x6e1d0200, 0xe0524300, + 0x6e1d0300, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, + 0xbef600ff, 0x01000000, + 0xbefc006f, 0xc031003a, + 0x00000078, 0x80f8c078, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff0, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0x8e788178, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0xbef60084, + 0xbef600ff, 0x01000000, + 0xc0211bfa, 0x00000078, + 0x80788478, 0xc0211b3a, + 0x00000078, 0x80788478, + 0xc0211b7a, 0x00000078, + 0x80788478, 0xc0211c3a, + 0x00000078, 0x80788478, + 0xc0211c7a, 0x00000078, + 0x80788478, 0xc0211eba, + 0x00000078, 0x80788478, + 0xc0211efa, 0x00000078, + 0x80788478, 0xc0211a3a, + 0x00000078, 0x80788478, + 0xc0211a7a, 0x00000078, + 0x80788478, 0xc0211cfa, + 0x00000078, 0x80788478, + 0xbf8cc07f, 0xbefc006f, + 0xbefe0070, 0xbeff0071, + 0x866f7bff, 0x000003ff, + 0xb96f4803, 0x866f7bff, + 0xfffff800, 0x8f6f8b6f, + 0xb96fa2c3, 0xb973f801, + 0xb8ee2a05, 0x806e816e, + 0x8e6e8a6e, 0x8e6e816e, + 0xb8ef1605, 0x806f816f, + 0x8e6f866f, 0x806e6f6e, + 0x806e746e, 0x826f8075, + 0x866fff6f, 0x0000ffff, + 0xc00b1c37, 0x00000050, + 0xc00b1d37, 0x00000060, + 0xc0031e77, 0x00000074, + 0xbf8cc07f, 0x866fff6d, + 0xf8000000, 0x8f6f9b6f, + 0x8e6f906f, 0xbeee0080, + 0x876e6f6e, 0x866fff6d, + 0x04000000, 0x8f6f9a6f, + 0x8e6f8f6f, 0x876e6f6e, + 0x866fff7a, 0x00800000, + 0x8f6f976f, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0x95806f6c, + 0xbf810000, 0x00000000, }; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_crat.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_crat.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 2019-08-31 15:01:11.853736168 -0500 @@ -662,6 +662,7 @@ case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: + case CHIP_ARCTURUS: pcache_info = vega10_cache_info; num_of_cache_types = ARRAY_SIZE(vega10_cache_info); break; @@ -788,7 +789,7 @@ * is put in the code to ensure we don't overwrite. */ #define VCRAT_SIZE_FOR_CPU (2 * PAGE_SIZE) -#define VCRAT_SIZE_FOR_GPU (3 * PAGE_SIZE) +#define VCRAT_SIZE_FOR_GPU (4 * PAGE_SIZE) /* kfd_fill_cu_for_cpu - Fill in Compute info for the given CPU NUMA node * diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_device.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_device.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_device.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_device.c 2019-08-31 15:01:11.853736168 -0500 @@ -42,6 +42,7 @@ #ifdef KFD_SUPPORT_IOMMU_V2 static const struct kfd_device_info kaveri_device_info = { .asic_family = CHIP_KAVERI, + .asic_name = "kaveri", .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, @@ -60,6 +61,7 @@ static const struct kfd_device_info carrizo_device_info = { .asic_family = CHIP_CARRIZO, + .asic_name = "carrizo", .max_pasid_bits = 16, /* max num of queues for CZ.TODO should be a dynamic value */ .max_no_of_hqd = 24, @@ -78,6 +80,7 @@ static const struct kfd_device_info raven_device_info = { .asic_family = CHIP_RAVEN, + .asic_name = "raven", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -96,6 +99,7 @@ static const struct kfd_device_info hawaii_device_info = { .asic_family = CHIP_HAWAII, + .asic_name = "hawaii", .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, @@ -114,6 +118,7 @@ static const struct kfd_device_info tonga_device_info = { .asic_family = CHIP_TONGA, + .asic_name = "tonga", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -131,6 +136,7 @@ static const struct kfd_device_info fiji_device_info = { .asic_family = CHIP_FIJI, + .asic_name = "fiji", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -148,6 +154,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .asic_family = CHIP_FIJI, + .asic_name = "fiji", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -166,6 +173,7 @@ static const struct kfd_device_info polaris10_device_info = { .asic_family = CHIP_POLARIS10, + .asic_name = "polaris10", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -183,6 +191,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .asic_family = CHIP_POLARIS10, + .asic_name = "polaris10", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -200,6 +209,7 @@ static const struct kfd_device_info polaris11_device_info = { .asic_family = CHIP_POLARIS11, + .asic_name = "polaris11", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -217,6 +227,7 @@ static const struct kfd_device_info polaris12_device_info = { .asic_family = CHIP_POLARIS12, + .asic_name = "polaris12", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -234,6 +245,7 @@ static const struct kfd_device_info vegam_device_info = { .asic_family = CHIP_VEGAM, + .asic_name = "vegam", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 4, @@ -251,6 +263,7 @@ static const struct kfd_device_info vega10_device_info = { .asic_family = CHIP_VEGA10, + .asic_name = "vega10", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -268,6 +281,7 @@ static const struct kfd_device_info vega10_vf_device_info = { .asic_family = CHIP_VEGA10, + .asic_name = "vega10", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -285,6 +299,7 @@ static const struct kfd_device_info vega12_device_info = { .asic_family = CHIP_VEGA12, + .asic_name = "vega12", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -302,6 +317,7 @@ static const struct kfd_device_info vega20_device_info = { .asic_family = CHIP_VEGA20, + .asic_name = "vega20", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -317,8 +333,27 @@ .num_sdma_queues_per_engine = 8, }; +static const struct kfd_device_info arcturus_device_info = { + .asic_family = CHIP_ARCTURUS, + .asic_name = "arcturus", + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 6, + .num_sdma_queues_per_engine = 8, +}; + static const struct kfd_device_info navi10_device_info = { .asic_family = CHIP_NAVI10, + .asic_name = "navi10", .max_pasid_bits = 16, .max_no_of_hqd = 24, .doorbell_size = 8, @@ -452,7 +487,10 @@ { 0x66a4, &vega20_device_info }, /* Vega20 */ { 0x66a7, &vega20_device_info }, /* Vega20 */ { 0x66af, &vega20_device_info }, /* Vega20 */ - /* Navi10 */ + { 0x738C, &arcturus_device_info }, /* Arcturus */ + { 0x7388, &arcturus_device_info }, /* Arcturus */ + { 0x738E, &arcturus_device_info }, /* Arcturus */ + { 0x7390, &arcturus_device_info }, /* Arcturus vf */ { 0x7310, &navi10_device_info }, /* Navi10 */ { 0x7312, &navi10_device_info }, /* Navi10 */ { 0x7318, &navi10_device_info }, /* Navi10 */ @@ -536,6 +574,10 @@ BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx8_hex; kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); + } else if (kfd->device_info->asic_family == CHIP_ARCTURUS) { + BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_arcturus_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); } else if (kfd->device_info->asic_family < CHIP_NAVI10) { BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); kfd->cwsr_isa = cwsr_trap_gfx9_hex; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 2019-08-31 15:01:11.853736168 -0500 @@ -880,8 +880,8 @@ } dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; - dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; - dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; + dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); + dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); return 0; } @@ -1019,8 +1019,8 @@ dqm->sdma_queue_count = 0; dqm->xgmi_sdma_queue_count = 0; dqm->active_runlist = false; - dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; - dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; + dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); + dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); @@ -1786,6 +1786,7 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: device_queue_manager_init_v9(&dqm->asic_ops); break; case CHIP_NAVI10: @@ -1813,7 +1814,8 @@ return NULL; } -void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd) +static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, + struct kfd_mem_obj *mqd) { WARN(!mqd, "No hiq sdma mqd trunk to free"); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c 2019-08-31 15:01:11.853736168 -0500 @@ -405,6 +405,7 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: case CHIP_NAVI10: kfd_init_apertures_v9(pdd, id); break; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c 2019-08-31 15:01:11.853736168 -0500 @@ -80,6 +80,7 @@ source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || source_id == SOC15_INTSRC_CP_BAD_OPCODE || client_id == SOC15_IH_CLIENTID_VMC || + client_id == SOC15_IH_CLIENTID_VMC1 || client_id == SOC15_IH_CLIENTID_UTCL2; } @@ -104,6 +105,7 @@ else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) kfd_signal_hw_exception_event(pasid); else if (client_id == SOC15_IH_CLIENTID_VMC || + client_id == SOC15_IH_CLIENTID_VMC1 || client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 2019-08-31 15:01:11.853736168 -0500 @@ -330,6 +330,7 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: kernel_queue_init_v9(&kq->ops_asic_specific); break; case CHIP_NAVI10: diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c 2019-08-31 15:01:11.853736168 -0500 @@ -81,7 +81,8 @@ packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; packet->bitfields2.process_quantum = 1; packet->bitfields2.pasid = qpd->pqm->process->pasid; - packet->bitfields14.gds_size = qpd->gds_size; + packet->bitfields14.gds_size = qpd->gds_size & 0x3F; + packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF; packet->bitfields14.num_gws = qpd->num_gws; packet->bitfields14.num_oac = qpd->num_oac; packet->bitfields14.sdma_enable = 1; @@ -143,6 +144,34 @@ return 0; } +static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res) +{ + struct pm4_mes_set_resources *packet; + + packet = (struct pm4_mes_set_resources *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); + + packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, + sizeof(struct pm4_mes_set_resources)); + + packet->bitfields2.queue_type = + queue_type__mes_set_resources__hsa_interface_queue_hiq; + packet->bitfields2.vmid_mask = res->vmid_mask; + packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; + packet->bitfields7.oac_mask = res->oac_mask; + packet->bitfields8.gds_heap_base = res->gds_heap_base; + packet->bitfields8.gds_heap_size = res->gds_heap_size; + + packet->gws_mask_lo = lower_32_bits(res->gws_mask); + packet->gws_mask_hi = upper_32_bits(res->gws_mask); + + packet->queue_mask_lo = lower_32_bits(res->queue_mask); + packet->queue_mask_hi = upper_32_bits(res->queue_mask); + + return 0; +} + static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, struct queue *q, bool is_static) { @@ -161,6 +190,8 @@ packet->bitfields2.engine_sel = engine_sel__mes_map_queues__compute_vi; packet->bitfields2.gws_control_queue = q->gws ? 1 : 0; + packet->bitfields2.extended_engine_sel = + extended_engine_sel__mes_map_queues__legacy_engine_sel; packet->bitfields2.queue_type = queue_type__mes_map_queues__normal_compute_vi; @@ -176,9 +207,15 @@ break; case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: - packet->bitfields2.engine_sel = q->properties.sdma_engine_id + - engine_sel__mes_map_queues__sdma0_vi; use_static = false; /* no static queues under SDMA */ + if (q->properties.sdma_engine_id < 2) + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + + engine_sel__mes_map_queues__sdma0_vi; + else { + packet->bitfields2.extended_engine_sel = + extended_engine_sel__mes_map_queues__sdma0_to_7_sel; + packet->bitfields2.engine_sel = q->properties.sdma_engine_id; + } break; default: WARN(1, "queue type %d", q->properties.type); @@ -218,13 +255,23 @@ switch (type) { case KFD_QUEUE_TYPE_COMPUTE: case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.extended_engine_sel = + extended_engine_sel__mes_unmap_queues__legacy_engine_sel; packet->bitfields2.engine_sel = engine_sel__mes_unmap_queues__compute; break; case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: - packet->bitfields2.engine_sel = - engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + if (sdma_engine < 2) { + packet->bitfields2.extended_engine_sel = + extended_engine_sel__mes_unmap_queues__legacy_engine_sel; + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + } else { + packet->bitfields2.extended_engine_sel = + extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel; + packet->bitfields2.engine_sel = sdma_engine; + } break; default: WARN(1, "queue type %d", type); @@ -326,7 +373,7 @@ const struct packet_manager_funcs kfd_v9_pm_funcs = { .map_process = pm_map_process_v9, .runlist = pm_runlist_v9, - .set_resources = pm_set_resources_vi, + .set_resources = pm_set_resources_v9, .map_queues = pm_map_queues_v9, .unmap_queues = pm_unmap_queues_v9, .query_status = pm_query_status_v9, diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c 2019-08-31 15:01:11.853736168 -0500 @@ -98,8 +98,8 @@ uint32_t *se_mask) { struct kfd_cu_info cu_info; - uint32_t cu_per_sh[4] = {0}; - int i, se, cu = 0; + uint32_t cu_per_se[KFD_MAX_NUM_SE] = {0}; + int i, se, sh, cu = 0; amdgpu_amdkfd_get_cu_info(mm->dev->kgd, &cu_info); @@ -107,8 +107,8 @@ cu_mask_count = cu_info.cu_active_number; for (se = 0; se < cu_info.num_shader_engines; se++) - for (i = 0; i < 4; i++) - cu_per_sh[se] += hweight32(cu_info.cu_bitmap[se][i]); + for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) + cu_per_se[se] += hweight32(cu_info.cu_bitmap[se % 4][sh + (se / 4)]); /* Symmetrically map cu_mask to all SEs: * cu_mask[0] bit0 -> se_mask[0] bit0; @@ -128,6 +128,6 @@ se = 0; cu++; } - } while (cu >= cu_per_sh[se] && cu < 32); + } while (cu >= cu_per_se[se] && cu < 32); } } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h 2019-08-31 15:01:11.853736168 -0500 @@ -26,6 +26,8 @@ #include "kfd_priv.h" +#define KFD_MAX_NUM_SE 8 + /** * struct mqd_manager * diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 2019-08-31 15:01:11.854736168 -0500 @@ -46,7 +46,7 @@ struct queue_properties *q) { struct v9_mqd *m; - uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */ + uint32_t se_mask[KFD_MAX_NUM_SE] = {0}; if (q->cu_mask_count == 0) return; @@ -59,12 +59,20 @@ m->compute_static_thread_mgmt_se1 = se_mask[1]; m->compute_static_thread_mgmt_se2 = se_mask[2]; m->compute_static_thread_mgmt_se3 = se_mask[3]; + m->compute_static_thread_mgmt_se4 = se_mask[4]; + m->compute_static_thread_mgmt_se5 = se_mask[5]; + m->compute_static_thread_mgmt_se6 = se_mask[6]; + m->compute_static_thread_mgmt_se7 = se_mask[7]; - pr_debug("update cu mask to %#x %#x %#x %#x\n", + pr_debug("update cu mask to %#x %#x %#x %#x %#x %#x %#x %#x\n", m->compute_static_thread_mgmt_se0, m->compute_static_thread_mgmt_se1, m->compute_static_thread_mgmt_se2, - m->compute_static_thread_mgmt_se3); + m->compute_static_thread_mgmt_se3, + m->compute_static_thread_mgmt_se4, + m->compute_static_thread_mgmt_se5, + m->compute_static_thread_mgmt_se6, + m->compute_static_thread_mgmt_se7); } static void set_priority(struct v9_mqd *m, struct queue_properties *q) @@ -125,6 +133,10 @@ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF; m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c 2019-08-31 15:01:11.854736168 -0500 @@ -239,6 +239,7 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: pm->pmf = &kfd_v9_pm_funcs; break; case CHIP_NAVI10: diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h 2019-08-31 15:01:11.854736168 -0500 @@ -83,10 +83,10 @@ union { struct { - uint32_t gds_heap_base:6; - uint32_t reserved3:5; - uint32_t gds_heap_size:6; - uint32_t reserved4:15; + uint32_t gds_heap_base:10; + uint32_t reserved3:1; + uint32_t gds_heap_size:10; + uint32_t reserved4:11; } bitfields8; uint32_t ordinal8; }; @@ -179,7 +179,7 @@ uint32_t num_gws:7; uint32_t sdma_enable:1; uint32_t num_oac:4; - uint32_t reserved8:4; + uint32_t gds_size_hi:4; uint32_t gds_size:6; uint32_t num_queues:10; } bitfields14; @@ -260,6 +260,10 @@ engine_sel__mes_map_queues__sdma1_vi = 3 }; +enum mes_map_queues_extended_engine_sel_enum { + extended_engine_sel__mes_map_queues__legacy_engine_sel = 0, + extended_engine_sel__mes_map_queues__sdma0_to_7_sel = 1 +}; struct pm4_mes_map_queues { union { @@ -269,7 +273,8 @@ union { struct { - uint32_t reserved1:4; + uint32_t reserved1:2; + enum mes_map_queues_extended_engine_sel_enum extended_engine_sel:2; enum mes_map_queues_queue_sel_enum queue_sel:2; uint32_t reserved5:6; uint32_t gws_control_queue:1; @@ -382,6 +387,11 @@ engine_sel__mes_unmap_queues__sdmal = 3 }; +enum mes_unmap_queues_extended_engine_sel_enum { + extended_engine_sel__mes_unmap_queues__legacy_engine_sel = 0, + extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel = 1 +}; + struct pm4_mes_unmap_queues { union { union PM4_MES_TYPE_3_HEADER header; /* header */ @@ -391,7 +401,7 @@ union { struct { enum mes_unmap_queues_action_enum action:2; - uint32_t reserved1:2; + enum mes_unmap_queues_extended_engine_sel_enum extended_engine_sel:2; enum mes_unmap_queues_queue_sel_enum queue_sel:2; uint32_t reserved2:20; enum mes_unmap_queues_engine_sel_enum engine_sel:3; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_priv.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_priv.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 2019-08-31 15:01:11.854736168 -0500 @@ -195,6 +195,7 @@ struct kfd_device_info { enum amd_asic_type asic_family; + const char *asic_name; const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; unsigned int max_no_of_hqd; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_process.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_process.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_process.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_process.c 2019-08-31 15:01:11.854736168 -0500 @@ -801,6 +801,8 @@ return ret; } + amdgpu_vm_set_task_info(pdd->vm); + ret = kfd_process_device_reserve_ib_mem(pdd); if (ret) goto err_reserve_ib_mem; @@ -1042,7 +1044,6 @@ { struct delayed_work *dwork; struct kfd_process *p; - struct kfd_process_device *pdd; int ret = 0; dwork = to_delayed_work(work); @@ -1051,16 +1052,6 @@ * lifetime of this thread, kfd_process p will be valid */ p = container_of(dwork, struct kfd_process, restore_work); - - /* Call restore_process_bos on the first KGD device. This function - * takes care of restoring the whole process including other devices. - * Restore can fail if enough memory is not available. If so, - * reschedule again. - */ - pdd = list_first_entry(&p->per_device_data, - struct kfd_process_device, - per_device_list); - pr_debug("Started restoring pasid %d\n", p->pasid); /* Setting last_restore_timestamp before successful restoration. diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_topology.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_topology.c --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 2019-08-31 15:01:11.854736168 -0500 @@ -406,8 +406,6 @@ char *buffer) { struct kfd_topology_device *dev; - char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; - uint32_t i; uint32_t log_max_watch_addr; /* Making sure that the buffer is an empty string */ @@ -422,14 +420,8 @@ if (strcmp(attr->name, "name") == 0) { dev = container_of(attr, struct kfd_topology_device, attr_name); - for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) { - public_name[i] = - (char)dev->node_props.marketing_name[i]; - if (dev->node_props.marketing_name[i] == 0) - break; - } - public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0; - return sysfs_show_str_val(buffer, public_name); + + return sysfs_show_str_val(buffer, dev->node_props.name); } dev = container_of(attr, struct kfd_topology_device, @@ -1274,6 +1266,10 @@ */ amdgpu_amdkfd_get_cu_info(dev->gpu->kgd, &cu_info); + + strncpy(dev->node_props.name, gpu->device_info->asic_name, + KFD_TOPOLOGY_PUBLIC_NAME_SIZE); + dev->node_props.simd_arrays_per_engine = cu_info.num_shader_arrays_per_engine; @@ -1321,6 +1317,7 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_RAVEN: + case CHIP_ARCTURUS: case CHIP_NAVI10: dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_topology.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_topology.h --- linux-5.3-rc6/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 2019-08-31 15:01:11.854736168 -0500 @@ -27,7 +27,7 @@ #include #include "kfd_crat.h" -#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128 +#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 32 #define HSA_CAP_HOT_PLUGGABLE 0x00000001 #define HSA_CAP_ATS_PRESENT 0x00000002 @@ -81,7 +81,7 @@ int32_t drm_render_minor; uint32_t num_sdma_engines; uint32_t num_sdma_xgmi_engines; - uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; + char name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; #define HSA_MEM_HEAP_TYPE_SYSTEM 0 diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 2019-08-31 15:01:12.261736204 -0500 @@ -688,12 +688,15 @@ */ if (adev->flags & AMD_IS_APU && adev->asic_type >= CHIP_CARRIZO && - adev->asic_type < CHIP_RAVEN) + adev->asic_type <= CHIP_RAVEN) init_data.flags.gpu_vm_support = true; if (amdgpu_dc_feature_mask & DC_FBC_MASK) init_data.flags.fbc_support = true; + if (amdgpu_dc_feature_mask & DC_MULTI_MON_PP_MCLK_SWITCH_MASK) + init_data.flags.multi_mon_pp_mclk_switch = true; + init_data.flags.power_down_display_on_boot = true; #ifdef CONFIG_DRM_AMD_DC_DCN2_0 @@ -809,6 +812,9 @@ case CHIP_VEGA12: case CHIP_VEGA20: case CHIP_NAVI10: + case CHIP_NAVI14: + case CHIP_NAVI12: + case CHIP_RENOIR: return 0; case CHIP_RAVEN: if (ASICREV_IS_PICASSO(adev->external_rev_id)) @@ -2358,7 +2364,12 @@ #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case CHIP_RAVEN: #if defined(CONFIG_DRM_AMD_DC_DCN2_0) + case CHIP_NAVI12: case CHIP_NAVI10: + case CHIP_NAVI14: +#endif +#if defined(CONFIG_DRM_AMD_DC_DCN2_1) + case CHIP_RENOIR: #endif if (dcn10_register_irq_handlers(dm->adev)) { DRM_ERROR("DM: Failed to initialize IRQ\n"); @@ -2428,8 +2439,7 @@ { int ret; int s3_state; - struct pci_dev *pdev = to_pci_dev(device); - struct drm_device *drm_dev = pci_get_drvdata(pdev); + struct drm_device *drm_dev = dev_get_drvdata(device); struct amdgpu_device *adev = drm_dev->dev_private; ret = kstrtoint(buf, 0, &s3_state); @@ -2515,10 +2525,23 @@ #endif #if defined(CONFIG_DRM_AMD_DC_DCN2_0) case CHIP_NAVI10: + case CHIP_NAVI12: adev->mode_info.num_crtc = 6; adev->mode_info.num_hpd = 6; adev->mode_info.num_dig = 6; break; + case CHIP_NAVI14: + adev->mode_info.num_crtc = 5; + adev->mode_info.num_hpd = 5; + adev->mode_info.num_dig = 5; + break; +#endif +#if defined(CONFIG_DRM_AMD_DC_DCN2_1) + case CHIP_RENOIR: + adev->mode_info.num_crtc = 4; + adev->mode_info.num_hpd = 4; + adev->mode_info.num_dig = 4; + break; #endif default: DRM_ERROR("Unsupported ASIC type: 0x%X\n", adev->asic_type); @@ -2665,7 +2688,7 @@ const struct amdgpu_framebuffer *afb, const enum surface_pixel_format format, const enum dc_rotation_angle rotation, - const union plane_size *plane_size, + const struct plane_size *plane_size, const union dc_tiling_info *tiling_info, const uint64_t info, struct dc_plane_dcc_param *dcc, @@ -2691,8 +2714,8 @@ return -EINVAL; input.format = format; - input.surface_size.width = plane_size->grph.surface_size.width; - input.surface_size.height = plane_size->grph.surface_size.height; + input.surface_size.width = plane_size->surface_size.width; + input.surface_size.height = plane_size->surface_size.height; input.swizzle_mode = tiling_info->gfx9.swizzle; if (rotation == ROTATION_ANGLE_0 || rotation == ROTATION_ANGLE_180) @@ -2710,9 +2733,9 @@ return -EINVAL; dcc->enable = 1; - dcc->grph.meta_pitch = + dcc->meta_pitch = AMDGPU_TILING_GET(info, DCC_PITCH_MAX) + 1; - dcc->grph.independent_64b_blks = i64b; + dcc->independent_64b_blks = i64b; dcc_address = get_dcc_address(afb->address, info); address->grph.meta_addr.low_part = lower_32_bits(dcc_address); @@ -2728,7 +2751,7 @@ const enum dc_rotation_angle rotation, const uint64_t tiling_flags, union dc_tiling_info *tiling_info, - union plane_size *plane_size, + struct plane_size *plane_size, struct dc_plane_dcc_param *dcc, struct dc_plane_address *address) { @@ -2741,11 +2764,11 @@ memset(address, 0, sizeof(*address)); if (format < SURFACE_PIXEL_FORMAT_VIDEO_BEGIN) { - plane_size->grph.surface_size.x = 0; - plane_size->grph.surface_size.y = 0; - plane_size->grph.surface_size.width = fb->width; - plane_size->grph.surface_size.height = fb->height; - plane_size->grph.surface_pitch = + plane_size->surface_size.x = 0; + plane_size->surface_size.y = 0; + plane_size->surface_size.width = fb->width; + plane_size->surface_size.height = fb->height; + plane_size->surface_pitch = fb->pitches[0] / fb->format->cpp[0]; address->type = PLN_ADDR_TYPE_GRAPHICS; @@ -2754,20 +2777,20 @@ } else if (format < SURFACE_PIXEL_FORMAT_INVALID) { uint64_t chroma_addr = afb->address + fb->offsets[1]; - plane_size->video.luma_size.x = 0; - plane_size->video.luma_size.y = 0; - plane_size->video.luma_size.width = fb->width; - plane_size->video.luma_size.height = fb->height; - plane_size->video.luma_pitch = + plane_size->surface_size.x = 0; + plane_size->surface_size.y = 0; + plane_size->surface_size.width = fb->width; + plane_size->surface_size.height = fb->height; + plane_size->surface_pitch = fb->pitches[0] / fb->format->cpp[0]; - plane_size->video.chroma_size.x = 0; - plane_size->video.chroma_size.y = 0; + plane_size->chroma_size.x = 0; + plane_size->chroma_size.y = 0; /* TODO: set these based on surface format */ - plane_size->video.chroma_size.width = fb->width / 2; - plane_size->video.chroma_size.height = fb->height / 2; + plane_size->chroma_size.width = fb->width / 2; + plane_size->chroma_size.height = fb->height / 2; - plane_size->video.chroma_pitch = + plane_size->chroma_pitch = fb->pitches[1] / fb->format->cpp[1]; address->type = PLN_ADDR_TYPE_VIDEO_PROGRESSIVE; @@ -2814,6 +2837,11 @@ adev->asic_type == CHIP_VEGA20 || #if defined(CONFIG_DRM_AMD_DC_DCN2_0) adev->asic_type == CHIP_NAVI10 || + adev->asic_type == CHIP_NAVI14 || + adev->asic_type == CHIP_NAVI12 || +#endif +#if defined(CONFIG_DRM_AMD_DC_DCN2_1) + adev->asic_type == CHIP_RENOIR || #endif adev->asic_type == CHIP_RAVEN) { /* Fill GFX9 params */ @@ -2995,6 +3023,8 @@ plane_info->visible = true; plane_info->stereo_format = PLANE_STEREO_FORMAT_NONE; + plane_info->layer_index = 0; + ret = fill_plane_color_attributes(plane_state, plane_info->format, &plane_info->color_space); if (ret) @@ -3060,6 +3090,7 @@ dc_plane_state->global_alpha = plane_info.global_alpha; dc_plane_state->global_alpha_value = plane_info.global_alpha_value; dc_plane_state->dcc = plane_info.dcc; + dc_plane_state->layer_index = plane_info.layer_index; // Always returns 0 /* * Always set input transfer function, since plane state is refreshed @@ -3503,6 +3534,10 @@ bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false; int mode_refresh; int preferred_refresh = 0; +#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT + struct dsc_dec_dpcd_caps dsc_caps; + uint32_t link_bandwidth_kbps; +#endif struct dc_sink *sink = NULL; if (aconnector == NULL) { @@ -3575,17 +3610,23 @@ &mode, &aconnector->base, con_state, old_stream); #ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT - /* stream->timing.flags.DSC = 0; */ - /* */ - /* if (aconnector->dc_link && */ - /* aconnector->dc_link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT #<{(|&& */ - /* aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.is_dsc_supported|)}>#) */ - /* if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc, */ - /* &aconnector->dc_link->dpcd_caps.dsc_caps, */ - /* dc_link_bandwidth_kbps(aconnector->dc_link, dc_link_get_link_cap(aconnector->dc_link)), */ - /* &stream->timing, */ - /* &stream->timing.dsc_cfg)) */ - /* stream->timing.flags.DSC = 1; */ + stream->timing.flags.DSC = 0; + + if (aconnector->dc_link && sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT) { + dc_dsc_parse_dsc_dpcd(aconnector->dc_link->dpcd_caps.dsc_caps.dsc_basic_caps.raw, + aconnector->dc_link->dpcd_caps.dsc_caps.dsc_ext_caps.raw, + &dsc_caps); + link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link, + dc_link_get_link_cap(aconnector->dc_link)); + + if (dsc_caps.is_dsc_supported) + if (dc_dsc_compute_config(aconnector->dc_link->ctx->dc, + &dsc_caps, + link_bandwidth_kbps, + &stream->timing, + &stream->timing.dsc_cfg)) + stream->timing.flags.DSC = 1; + } #endif update_stream_scaling_settings(&mode, dm_state, stream); @@ -3669,7 +3710,7 @@ state->abm_level = cur->abm_level; state->vrr_supported = cur->vrr_supported; state->freesync_config = cur->freesync_config; - state->crc_enabled = cur->crc_enabled; + state->crc_src = cur->crc_src; state->cm_has_degamma = cur->cm_has_degamma; state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb; @@ -3739,6 +3780,7 @@ .atomic_destroy_state = dm_crtc_destroy_state, .set_crc_source = amdgpu_dm_crtc_set_crc_source, .verify_crc_source = amdgpu_dm_crtc_verify_crc_source, + .get_crc_sources = amdgpu_dm_crtc_get_crc_sources, .enable_vblank = dm_enable_vblank, .disable_vblank = dm_disable_vblank, }; @@ -4458,7 +4500,7 @@ } if (plane->type != DRM_PLANE_TYPE_CURSOR) - domain = amdgpu_display_supported_domains(adev); + domain = amdgpu_display_supported_domains(adev, rbo->flags); else domain = AMDGPU_GEM_DOMAIN_VRAM; @@ -4548,20 +4590,10 @@ static int dm_plane_atomic_async_check(struct drm_plane *plane, struct drm_plane_state *new_plane_state) { - struct drm_plane_state *old_plane_state = - drm_atomic_get_old_plane_state(new_plane_state->state, plane); - /* Only support async updates on cursor planes. */ if (plane->type != DRM_PLANE_TYPE_CURSOR) return -EINVAL; - /* - * DRM calls prepare_fb and cleanup_fb on new_plane_state for - * async commits so don't allow fb changes. - */ - if (old_plane_state->fb != new_plane_state->fb) - return -EINVAL; - return 0; } @@ -5705,11 +5737,11 @@ * deadlock during GPU reset when this fence will not signal * but we hold reservation lock for the BO. */ - r = reservation_object_wait_timeout_rcu(abo->tbo.resv, true, + r = reservation_object_wait_timeout_rcu(abo->tbo.base.resv, true, false, msecs_to_jiffies(5000)); if (unlikely(r <= 0)) - DRM_ERROR("Waiting for fences timed out or interrupted!"); + DRM_ERROR("Waiting for fences timed out!"); /* * TODO This might fail and hence better not used, wait @@ -5733,8 +5765,14 @@ bundle->surface_updates[planes_count].plane_info = &bundle->plane_infos[planes_count]; + /* + * Only allow immediate flips for fast updates that don't + * change FB pitch, DCC state, rotation or mirroing. + */ bundle->flip_addrs[planes_count].flip_immediate = - (crtc->state->pageflip_flags & DRM_MODE_PAGE_FLIP_ASYNC) != 0; + (crtc->state->pageflip_flags & + DRM_MODE_PAGE_FLIP_ASYNC) != 0 && + acrtc_state->update_type == UPDATE_TYPE_FAST; timestamp_ns = ktime_get_ns(); bundle->flip_addrs[planes_count].flip_timestamp_in_us = div_u64(timestamp_ns, 1000); @@ -5979,6 +6017,7 @@ struct drm_crtc *crtc; struct drm_crtc_state *old_crtc_state, *new_crtc_state; int i; + enum amdgpu_dm_pipe_crc_source source; for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { @@ -6004,9 +6043,11 @@ #ifdef CONFIG_DEBUG_FS /* The stream has changed so CRC capture needs to re-enabled. */ - if (dm_new_crtc_state->crc_enabled) { - dm_new_crtc_state->crc_enabled = false; - amdgpu_dm_crtc_set_crc_source(crtc, "auto"); + source = dm_new_crtc_state->crc_src; + if (amdgpu_dm_is_valid_crc_source(source)) { + amdgpu_dm_crtc_configure_crc_source( + crtc, dm_new_crtc_state, + dm_new_crtc_state->crc_src); } #endif } @@ -6057,23 +6098,8 @@ if (dm_old_crtc_state->interrupts_enabled && (!dm_new_crtc_state->interrupts_enabled || - drm_atomic_crtc_needs_modeset(new_crtc_state))) { - /* - * Drop the extra vblank reference added by CRC - * capture if applicable. - */ - if (dm_new_crtc_state->crc_enabled) - drm_crtc_vblank_put(crtc); - - /* - * Only keep CRC capture enabled if there's - * still a stream for the CRTC. - */ - if (!dm_new_crtc_state->stream) - dm_new_crtc_state->crc_enabled = false; - + drm_atomic_crtc_needs_modeset(new_crtc_state))) manage_dm_interrupts(adev, acrtc, false); - } } /* * Add check here for SoC's that support hardware cursor plane, to @@ -7045,6 +7071,12 @@ continue; for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, j) { + const struct amdgpu_framebuffer *amdgpu_fb = + to_amdgpu_framebuffer(new_plane_state->fb); + struct dc_plane_info plane_info; + struct dc_flip_addrs flip_addr; + uint64_t tiling_flags; + new_plane_crtc = new_plane_state->crtc; old_plane_crtc = old_plane_state->crtc; new_dm_plane_state = to_dm_plane_state(new_plane_state); @@ -7088,6 +7120,24 @@ updates[num_plane].scaling_info = &scaling_info; + if (amdgpu_fb) { + ret = get_fb_info(amdgpu_fb, &tiling_flags); + if (ret) + goto cleanup; + + memset(&flip_addr, 0, sizeof(flip_addr)); + + ret = fill_dc_plane_info_and_addr( + dm->adev, new_plane_state, tiling_flags, + &plane_info, + &flip_addr.address); + if (ret) + goto cleanup; + + updates[num_plane].plane_info = &plane_info; + updates[num_plane].flip_addr = &flip_addr; + } + num_plane++; } @@ -7284,6 +7334,26 @@ if (ret) goto fail; + if (state->legacy_cursor_update) { + /* + * This is a fast cursor update coming from the plane update + * helper, check if it can be done asynchronously for better + * performance. + */ + state->async_update = + !drm_atomic_helper_async_check(dev, state); + + /* + * Skip the remaining global validation if this is an async + * update. Cursor updates can be done without affecting + * state or bandwidth calcs and this avoids the performance + * penalty of locking the private state object and + * allocating a new dc_state. + */ + if (state->async_update) + return 0; + } + /* Check scaling and underscan changes*/ /* TODO Removed scaling changes validation due to inability to commit * new stream into context w\o causing full reset. Need to @@ -7336,13 +7406,37 @@ ret = -EINVAL; goto fail; } - } else if (state->legacy_cursor_update) { + } else { /* - * This is a fast cursor update coming from the plane update - * helper, check if it can be done asynchronously for better - * performance. + * The commit is a fast update. Fast updates shouldn't change + * the DC context, affect global validation, and can have their + * commit work done in parallel with other commits not touching + * the same resource. If we have a new DC context as part of + * the DM atomic state from validation we need to free it and + * retain the existing one instead. */ - state->async_update = !drm_atomic_helper_async_check(dev, state); + struct dm_atomic_state *new_dm_state, *old_dm_state; + + new_dm_state = dm_atomic_get_new_state(state); + old_dm_state = dm_atomic_get_old_state(state); + + if (new_dm_state && old_dm_state) { + if (new_dm_state->context) + dc_release_state(new_dm_state->context); + + new_dm_state->context = old_dm_state->context; + + if (old_dm_state->context) + dc_retain_state(old_dm_state->context); + } + } + + /* Store the overall update type for use later in atomic check. */ + for_each_new_crtc_in_state (state, crtc, new_crtc_state, i) { + struct dm_crtc_state *dm_new_crtc_state = + to_dm_crtc_state(new_crtc_state); + + dm_new_crtc_state->update_type = (int)overall_update_type; } /* Must be success */ diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c 2019-08-31 15:01:11.854736168 -0500 @@ -30,23 +30,57 @@ #include "amdgpu_dm.h" #include "dc.h" -enum amdgpu_dm_pipe_crc_source { - AMDGPU_DM_PIPE_CRC_SOURCE_NONE = 0, - AMDGPU_DM_PIPE_CRC_SOURCE_AUTO, - AMDGPU_DM_PIPE_CRC_SOURCE_MAX, - AMDGPU_DM_PIPE_CRC_SOURCE_INVALID = -1, +static const char *const pipe_crc_sources[] = { + "none", + "crtc", + "crtc dither", + "dprx", + "dprx dither", + "auto", }; static enum amdgpu_dm_pipe_crc_source dm_parse_crc_source(const char *source) { if (!source || !strcmp(source, "none")) return AMDGPU_DM_PIPE_CRC_SOURCE_NONE; - if (!strcmp(source, "auto")) - return AMDGPU_DM_PIPE_CRC_SOURCE_AUTO; + if (!strcmp(source, "auto") || !strcmp(source, "crtc")) + return AMDGPU_DM_PIPE_CRC_SOURCE_CRTC; + if (!strcmp(source, "dprx")) + return AMDGPU_DM_PIPE_CRC_SOURCE_DPRX; + if (!strcmp(source, "crtc dither")) + return AMDGPU_DM_PIPE_CRC_SOURCE_CRTC_DITHER; + if (!strcmp(source, "dprx dither")) + return AMDGPU_DM_PIPE_CRC_SOURCE_DPRX_DITHER; return AMDGPU_DM_PIPE_CRC_SOURCE_INVALID; } +static bool dm_is_crc_source_crtc(enum amdgpu_dm_pipe_crc_source src) +{ + return (src == AMDGPU_DM_PIPE_CRC_SOURCE_CRTC) || + (src == AMDGPU_DM_PIPE_CRC_SOURCE_CRTC_DITHER); +} + +static bool dm_is_crc_source_dprx(enum amdgpu_dm_pipe_crc_source src) +{ + return (src == AMDGPU_DM_PIPE_CRC_SOURCE_DPRX) || + (src == AMDGPU_DM_PIPE_CRC_SOURCE_DPRX_DITHER); +} + +static bool dm_need_crc_dither(enum amdgpu_dm_pipe_crc_source src) +{ + return (src == AMDGPU_DM_PIPE_CRC_SOURCE_CRTC_DITHER) || + (src == AMDGPU_DM_PIPE_CRC_SOURCE_DPRX_DITHER) || + (src == AMDGPU_DM_PIPE_CRC_SOURCE_NONE); +} + +const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc, + size_t *count) +{ + *count = ARRAY_SIZE(pipe_crc_sources); + return pipe_crc_sources; +} + int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, size_t *values_cnt) @@ -63,14 +97,52 @@ return 0; } -int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) +int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, + struct dm_crtc_state *dm_crtc_state, + enum amdgpu_dm_pipe_crc_source source) { struct amdgpu_device *adev = crtc->dev->dev_private; - struct dm_crtc_state *crtc_state = to_dm_crtc_state(crtc->state); - struct dc_stream_state *stream_state = crtc_state->stream; - bool enable; + struct dc_stream_state *stream_state = dm_crtc_state->stream; + bool enable = amdgpu_dm_is_valid_crc_source(source); + int ret = 0; + + /* Configuration will be deferred to stream enable. */ + if (!stream_state) + return 0; + + mutex_lock(&adev->dm.dc_lock); + + /* Enable CRTC CRC generation if necessary. */ + if (dm_is_crc_source_crtc(source)) { + if (!dc_stream_configure_crc(stream_state->ctx->dc, + stream_state, enable, enable)) { + ret = -EINVAL; + goto unlock; + } + } + + /* Configure dithering */ + if (!dm_need_crc_dither(source)) + dc_stream_set_dither_option(stream_state, DITHER_OPTION_TRUN8); + else + dc_stream_set_dither_option(stream_state, + DITHER_OPTION_DEFAULT); +unlock: + mutex_unlock(&adev->dm.dc_lock); + + return ret; +} + +int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) +{ enum amdgpu_dm_pipe_crc_source source = dm_parse_crc_source(src_name); + struct drm_crtc_commit *commit; + struct dm_crtc_state *crtc_state; + struct drm_dp_aux *aux = NULL; + bool enable = false; + bool enabled = false; + int ret = 0; if (source < 0) { DRM_DEBUG_DRIVER("Unknown CRC source %s for CRTC%d\n", @@ -78,41 +150,124 @@ return -EINVAL; } - if (!stream_state) { - DRM_ERROR("No stream state for CRTC%d\n", crtc->index); - return -EINVAL; + ret = drm_modeset_lock(&crtc->mutex, NULL); + if (ret) + return ret; + + spin_lock(&crtc->commit_lock); + commit = list_first_entry_or_null(&crtc->commit_list, + struct drm_crtc_commit, commit_entry); + if (commit) + drm_crtc_commit_get(commit); + spin_unlock(&crtc->commit_lock); + + if (commit) { + /* + * Need to wait for all outstanding programming to complete + * in commit tail since it can modify CRC related fields and + * hardware state. Since we're holding the CRTC lock we're + * guaranteed that no other commit work can be queued off + * before we modify the state below. + */ + ret = wait_for_completion_interruptible_timeout( + &commit->hw_done, 10 * HZ); + if (ret) + goto cleanup; } - enable = (source == AMDGPU_DM_PIPE_CRC_SOURCE_AUTO); + enable = amdgpu_dm_is_valid_crc_source(source); + crtc_state = to_dm_crtc_state(crtc->state); - mutex_lock(&adev->dm.dc_lock); - if (!dc_stream_configure_crc(stream_state->ctx->dc, stream_state, - enable, enable)) { - mutex_unlock(&adev->dm.dc_lock); - return -EINVAL; + /* + * USER REQ SRC | CURRENT SRC | BEHAVIOR + * ----------------------------- + * None | None | Do nothing + * None | CRTC | Disable CRTC CRC, set default to dither + * None | DPRX | Disable DPRX CRC, need 'aux', set default to dither + * None | CRTC DITHER | Disable CRTC CRC + * None | DPRX DITHER | Disable DPRX CRC, need 'aux' + * CRTC | XXXX | Enable CRTC CRC, no dither + * DPRX | XXXX | Enable DPRX CRC, need 'aux', no dither + * CRTC DITHER | XXXX | Enable CRTC CRC, set dither + * DPRX DITHER | XXXX | Enable DPRX CRC, need 'aux', set dither + */ + if (dm_is_crc_source_dprx(source) || + (source == AMDGPU_DM_PIPE_CRC_SOURCE_NONE && + dm_is_crc_source_dprx(crtc_state->crc_src))) { + struct amdgpu_dm_connector *aconn = NULL; + struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; + + drm_connector_list_iter_begin(crtc->dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { + if (!connector->state || connector->state->crtc != crtc) + continue; + + aconn = to_amdgpu_dm_connector(connector); + break; + } + drm_connector_list_iter_end(&conn_iter); + + if (!aconn) { + DRM_DEBUG_DRIVER("No amd connector matching CRTC-%d\n", crtc->index); + ret = -EINVAL; + goto cleanup; + } + + aux = &aconn->dm_dp_aux.aux; + + if (!aux) { + DRM_DEBUG_DRIVER("No dp aux for amd connector\n"); + ret = -EINVAL; + goto cleanup; + } } - /* When enabling CRC, we should also disable dithering. */ - dc_stream_set_dither_option(stream_state, - enable ? DITHER_OPTION_TRUN8 - : DITHER_OPTION_DEFAULT); - - mutex_unlock(&adev->dm.dc_lock); + if (amdgpu_dm_crtc_configure_crc_source(crtc, crtc_state, source)) { + ret = -EINVAL; + goto cleanup; + } /* * Reading the CRC requires the vblank interrupt handler to be * enabled. Keep a reference until CRC capture stops. */ - if (!crtc_state->crc_enabled && enable) - drm_crtc_vblank_get(crtc); - else if (crtc_state->crc_enabled && !enable) + enabled = amdgpu_dm_is_valid_crc_source(crtc_state->crc_src); + if (!enabled && enable) { + ret = drm_crtc_vblank_get(crtc); + if (ret) + goto cleanup; + + if (dm_is_crc_source_dprx(source)) { + if (drm_dp_start_crc(aux, crtc)) { + DRM_DEBUG_DRIVER("dp start crc failed\n"); + ret = -EINVAL; + goto cleanup; + } + } + } else if (enabled && !enable) { drm_crtc_vblank_put(crtc); + if (dm_is_crc_source_dprx(source)) { + if (drm_dp_stop_crc(aux)) { + DRM_DEBUG_DRIVER("dp stop crc failed\n"); + ret = -EINVAL; + goto cleanup; + } + } + } - crtc_state->crc_enabled = enable; + crtc_state->crc_src = source; /* Reset crc_skipped on dm state */ crtc_state->crc_skip_count = 0; - return 0; + +cleanup: + if (commit) + drm_crtc_commit_put(commit); + + drm_modeset_unlock(&crtc->mutex); + + return ret; } /** @@ -135,7 +290,7 @@ stream_state = crtc_state->stream; /* Early return if CRC capture is not enabled. */ - if (!crtc_state->crc_enabled) + if (!amdgpu_dm_is_valid_crc_source(crtc_state->crc_src)) return; /* @@ -149,10 +304,12 @@ return; } - if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, - &crcs[0], &crcs[1], &crcs[2])) - return; + if (dm_is_crc_source_crtc(crtc_state->crc_src)) { + if (!dc_stream_get_crc(stream_state->ctx->dc, stream_state, + &crcs[0], &crcs[1], &crcs[2])) + return; - drm_crtc_add_crc_entry(crtc, true, - drm_crtc_accurate_vblank_count(crtc), crcs); + drm_crtc_add_crc_entry(crtc, true, + drm_crtc_accurate_vblank_count(crtc), crcs); + } } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h --- linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h 2019-08-31 15:01:11.854736168 -0500 @@ -0,0 +1,67 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef AMD_DAL_DEV_AMDGPU_DM_AMDGPU_DM_CRC_H_ +#define AMD_DAL_DEV_AMDGPU_DM_AMDGPU_DM_CRC_H_ + +struct drm_crtc; +struct dm_crtc_state; + +enum amdgpu_dm_pipe_crc_source { + AMDGPU_DM_PIPE_CRC_SOURCE_NONE = 0, + AMDGPU_DM_PIPE_CRC_SOURCE_CRTC, + AMDGPU_DM_PIPE_CRC_SOURCE_CRTC_DITHER, + AMDGPU_DM_PIPE_CRC_SOURCE_DPRX, + AMDGPU_DM_PIPE_CRC_SOURCE_DPRX_DITHER, + AMDGPU_DM_PIPE_CRC_SOURCE_MAX, + AMDGPU_DM_PIPE_CRC_SOURCE_INVALID = -1, +}; + +static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source source) +{ + return (source > AMDGPU_DM_PIPE_CRC_SOURCE_NONE) && + (source < AMDGPU_DM_PIPE_CRC_SOURCE_MAX); +} + +/* amdgpu_dm_crc.c */ +#ifdef CONFIG_DEBUG_FS +int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, + struct dm_crtc_state *dm_crtc_state, + enum amdgpu_dm_pipe_crc_source source); +int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name); +int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, + const char *src_name, + size_t *values_cnt); +const char *const *amdgpu_dm_crtc_get_crc_sources(struct drm_crtc *crtc, + size_t *count); +void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc); +#else +#define amdgpu_dm_crtc_set_crc_source NULL +#define amdgpu_dm_crtc_verify_crc_source NULL +#define amdgpu_dm_crtc_get_crc_sources NULL +#define amdgpu_dm_crtc_handle_crc_irq(x) +#endif + +#endif /* AMD_DAL_DEV_AMDGPU_DM_AMDGPU_DM_CRC_H_ */ diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c 2019-08-31 15:01:11.855736168 -0500 @@ -1053,9 +1053,33 @@ return 0; } +static int mst_topo(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct drm_connector *connector; + struct drm_connector_list_iter conn_iter; + struct amdgpu_dm_connector *aconnector; + + drm_connector_list_iter_begin(dev, &conn_iter); + drm_for_each_connector_iter(connector, &conn_iter) { + if (connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) + continue; + + aconnector = to_amdgpu_dm_connector(connector); + + seq_printf(m, "\nMST topology for connector %d\n", aconnector->connector_id); + drm_dp_mst_dump_topology(m, &aconnector->mst_mgr); + } + drm_connector_list_iter_end(&conn_iter); + + return 0; +} + static const struct drm_info_list amdgpu_dm_debugfs_list[] = { {"amdgpu_current_backlight_pwm", ¤t_backlight_read}, {"amdgpu_target_backlight_pwm", &target_backlight_read}, + {"amdgpu_mst_topology", &mst_topo}, }; /* diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h --- linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 2019-08-31 15:01:11.854736168 -0500 @@ -50,6 +50,7 @@ #include "irq_types.h" #include "signal_types.h" +#include "amdgpu_dm_crc.h" /* Forward declarations */ struct amdgpu_device; @@ -309,11 +310,12 @@ bool cm_has_degamma; bool cm_is_degamma_srgb; + int update_type; int active_planes; bool interrupts_enabled; int crc_skip_count; - bool crc_enabled; + enum amdgpu_dm_pipe_crc_source crc_src; bool freesync_timing_changed; bool freesync_vrr_info_changed; @@ -380,19 +382,6 @@ void amdgpu_dm_update_freesync_caps(struct drm_connector *connector, struct edid *edid); -/* amdgpu_dm_crc.c */ -#ifdef CONFIG_DEBUG_FS -int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name); -int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, - const char *src_name, - size_t *values_cnt); -void amdgpu_dm_crtc_handle_crc_irq(struct drm_crtc *crtc); -#else -#define amdgpu_dm_crtc_set_crc_source NULL -#define amdgpu_dm_crtc_verify_crc_source NULL -#define amdgpu_dm_crtc_handle_crc_irq(x) -#endif - #define MAX_COLOR_LUT_ENTRIES 4096 /* Legacy gamm LUT users such as X doesn't like large LUT sizes */ #define MAX_COLOR_LEGACY_LUT_ENTRIES 256 diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c 2019-08-31 15:01:11.855736168 -0500 @@ -548,7 +548,9 @@ bool enable ) { - return false; + uint8_t enable_dsc = enable ? 1 : 0; + + return dm_helpers_dp_write_dpcd(ctx, stream->sink->link, DP_DSC_ENABLE, &enable_dsc, 1); } #endif diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c 2019-08-31 15:01:11.855736168 -0500 @@ -156,6 +156,26 @@ kfree(amdgpu_dm_connector); } +static int +amdgpu_dm_mst_connector_late_register(struct drm_connector *connector) +{ + struct amdgpu_dm_connector *amdgpu_dm_connector = + to_amdgpu_dm_connector(connector); + struct drm_dp_mst_port *port = amdgpu_dm_connector->port; + + return drm_dp_mst_connector_late_register(connector, port); +} + +static void +amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector) +{ + struct amdgpu_dm_connector *amdgpu_dm_connector = + to_amdgpu_dm_connector(connector); + struct drm_dp_mst_port *port = amdgpu_dm_connector->port; + + drm_dp_mst_connector_early_unregister(connector, port); +} + static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { .detect = dm_dp_mst_detect, .fill_modes = drm_helper_probe_single_connector_modes, @@ -164,7 +184,9 @@ .atomic_duplicate_state = amdgpu_dm_connector_atomic_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_set_property = amdgpu_dm_connector_atomic_set_property, - .atomic_get_property = amdgpu_dm_connector_atomic_get_property + .atomic_get_property = amdgpu_dm_connector_atomic_get_property, + .late_register = amdgpu_dm_mst_connector_late_register, + .early_unregister = amdgpu_dm_mst_connector_early_unregister, }; static int dm_dp_mst_get_modes(struct drm_connector *connector) @@ -388,7 +410,7 @@ struct amdgpu_dm_connector *aconnector) { aconnector->dm_dp_aux.aux.name = "dmdc"; - aconnector->dm_dp_aux.aux.dev = dm->adev->dev; + aconnector->dm_dp_aux.aux.dev = aconnector->base.kdev; aconnector->dm_dp_aux.aux.transfer = dm_dp_aux_transfer; aconnector->dm_dp_aux.ddc_service = aconnector->dc_link->ddc; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_pp_smu.c 2019-08-31 15:01:11.855736168 -0500 @@ -151,18 +151,31 @@ static enum smu_clk_type dc_to_smu_clock_type( enum dm_pp_clock_type dm_pp_clk_type) { -#define DCCLK_MAP_SMUCLK(dcclk, smuclk) \ - [dcclk] = smuclk + enum smu_clk_type smu_clk_type = SMU_CLK_COUNT; - static int dc_clk_type_map[] = { - DCCLK_MAP_SMUCLK(DM_PP_CLOCK_TYPE_DISPLAY_CLK, SMU_DISPCLK), - DCCLK_MAP_SMUCLK(DM_PP_CLOCK_TYPE_ENGINE_CLK, SMU_GFXCLK), - DCCLK_MAP_SMUCLK(DM_PP_CLOCK_TYPE_MEMORY_CLK, SMU_MCLK), - DCCLK_MAP_SMUCLK(DM_PP_CLOCK_TYPE_DCEFCLK, SMU_DCEFCLK), - DCCLK_MAP_SMUCLK(DM_PP_CLOCK_TYPE_SOCCLK, SMU_SOCCLK), - }; + switch (dm_pp_clk_type) { + case DM_PP_CLOCK_TYPE_DISPLAY_CLK: + smu_clk_type = SMU_DISPCLK; + break; + case DM_PP_CLOCK_TYPE_ENGINE_CLK: + smu_clk_type = SMU_GFXCLK; + break; + case DM_PP_CLOCK_TYPE_MEMORY_CLK: + smu_clk_type = SMU_MCLK; + break; + case DM_PP_CLOCK_TYPE_DCEFCLK: + smu_clk_type = SMU_DCEFCLK; + break; + case DM_PP_CLOCK_TYPE_SOCCLK: + smu_clk_type = SMU_SOCCLK; + break; + default: + DRM_ERROR("DM_PPLIB: invalid clock type: %d!\n", + dm_pp_clk_type); + break; + } - return dc_clk_type_map[dm_pp_clk_type]; + return smu_clk_type; } static enum amd_pp_clock_type dc_to_pp_clock_type( @@ -334,7 +347,7 @@ } } else if (adev->smu.funcs && adev->smu.funcs->get_clock_by_type) { if (smu_get_clock_by_type(&adev->smu, - dc_to_smu_clock_type(clk_type), + dc_to_pp_clock_type(clk_type), &pp_clks)) { get_default_clock_levels(clk_type, dc_clks); return true; @@ -419,7 +432,7 @@ return false; } else if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->get_clock_by_type_with_latency) { if (smu_get_clock_by_type_with_latency(&adev->smu, - dc_to_pp_clock_type(clk_type), + dc_to_smu_clock_type(clk_type), &pp_clks)) return false; } @@ -801,6 +814,19 @@ return PP_SMU_RESULT_OK; } +enum pp_smu_status pp_nv_set_pstate_handshake_support( + struct pp_smu *pp, BOOLEAN pstate_handshake_supported) +{ + const struct dc_context *ctx = pp->dm; + struct amdgpu_device *adev = ctx->driver_context; + struct smu_context *smu = &adev->smu; + + if (smu_display_disable_memory_clock_switch(smu, !pstate_handshake_supported)) + return PP_SMU_RESULT_FAIL; + + return PP_SMU_RESULT_OK; +} + enum pp_smu_status pp_nv_set_voltage_by_freq(struct pp_smu *pp, enum pp_smu_nv_clock_id clock_id, int mhz) { @@ -916,6 +942,7 @@ funcs->nv_funcs.get_maximum_sustainable_clocks = pp_nv_get_maximum_sustainable_clocks; /*todo compare data with window driver */ funcs->nv_funcs.get_uclk_dpm_states = pp_nv_get_uclk_dpm_states; + funcs->nv_funcs.set_pstate_handshake_support = pp_nv_set_pstate_handshake_support; break; #endif default: diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c 2019-08-31 15:01:11.855736168 -0500 @@ -1881,8 +1881,6 @@ .get_device_tag = bios_parser_get_device_tag, - .get_firmware_info = bios_parser_get_firmware_info, - .get_spread_spectrum_info = bios_parser_get_spread_spectrum_info, .get_ss_entry_number = bios_parser_get_ss_entry_number, @@ -1998,6 +1996,7 @@ dal_bios_parser_init_cmd_tbl_helper2(&bp->cmd_helper, dce_version); bp->base.integrated_info = bios_parser_create_integrated_info(&bp->base); + bp->base.fw_info_valid = bios_parser_get_firmware_info(&bp->base, &bp->base.fw_info) == BP_RESULT_OK; return true; } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c 2019-08-31 15:01:11.855736168 -0500 @@ -2796,8 +2796,6 @@ .get_device_tag = bios_parser_get_device_tag, - .get_firmware_info = bios_parser_get_firmware_info, - .get_spread_spectrum_info = bios_parser_get_spread_spectrum_info, .get_ss_entry_number = bios_parser_get_ss_entry_number, @@ -2922,6 +2920,7 @@ dal_bios_parser_init_cmd_tbl_helper(&bp->cmd_helper, dce_version); bp->base.integrated_info = bios_parser_create_integrated_info(&bp->base); + bp->base.fw_info_valid = bios_parser_get_firmware_info(&bp->base, &bp->base.fw_info) == BP_RESULT_OK; return true; } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c 2019-08-31 15:01:11.856736168 -0500 @@ -67,6 +67,11 @@ *h = dal_cmd_tbl_helper_dce112_get_table2(); return true; #endif +#if defined(CONFIG_DRM_AMD_DC_DCN2_1) + case DCN_VERSION_2_1: + *h = dal_cmd_tbl_helper_dce112_get_table2(); + return true; +#endif case DCE_VERSION_12_0: case DCE_VERSION_12_1: *h = dal_cmd_tbl_helper_dce112_get_table2(); diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/dce110/command_table_helper_dce110.c 2019-08-31 15:01:11.856736168 -0500 @@ -153,38 +153,10 @@ static uint8_t dig_encoder_sel_to_atom(enum engine_id id) { - uint8_t atom_dig_encoder_sel = 0; - - switch (id) { - case ENGINE_ID_DIGA: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V5__DIGA_SEL; - break; - case ENGINE_ID_DIGB: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V5__DIGB_SEL; - break; - case ENGINE_ID_DIGC: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V5__DIGC_SEL; - break; - case ENGINE_ID_DIGD: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V5__DIGD_SEL; - break; - case ENGINE_ID_DIGE: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V5__DIGE_SEL; - break; - case ENGINE_ID_DIGF: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V5__DIGF_SEL; - break; - case ENGINE_ID_DIGG: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V5__DIGG_SEL; - break; - case ENGINE_ID_UNKNOWN: - /* No DIG_FRONT is associated to DIG_BACKEND */ - atom_dig_encoder_sel = 0; - break; - default: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V5__DIGA_SEL; - break; - } + /* On any ASIC after DCE80, we manually program the DIG_FE + * selection (see connect_dig_be_to_fe function of the link + * encoder), so translation should always return 0 (no FE). + */ return 0; } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper2_dce112.c 2019-08-31 15:01:11.856736168 -0500 @@ -150,38 +150,10 @@ static uint8_t dig_encoder_sel_to_atom(enum engine_id id) { - uint8_t atom_dig_encoder_sel = 0; - - switch (id) { - case ENGINE_ID_DIGA: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGA_SEL; - break; - case ENGINE_ID_DIGB: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGB_SEL; - break; - case ENGINE_ID_DIGC: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGC_SEL; - break; - case ENGINE_ID_DIGD: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGD_SEL; - break; - case ENGINE_ID_DIGE: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGE_SEL; - break; - case ENGINE_ID_DIGF: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGF_SEL; - break; - case ENGINE_ID_DIGG: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGG_SEL; - break; - case ENGINE_ID_UNKNOWN: - /* No DIG_FRONT is associated to DIG_BACKEND */ - atom_dig_encoder_sel = 0; - break; - default: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGA_SEL; - break; - } + /* On any ASIC after DCE80, we manually program the DIG_FE + * selection (see connect_dig_be_to_fe function of the link + * encoder), so translation should always return 0 (no FE). + */ return 0; } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/bios/dce112/command_table_helper_dce112.c 2019-08-31 15:01:11.856736168 -0500 @@ -150,38 +150,10 @@ static uint8_t dig_encoder_sel_to_atom(enum engine_id id) { - uint8_t atom_dig_encoder_sel = 0; - - switch (id) { - case ENGINE_ID_DIGA: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGA_SEL; - break; - case ENGINE_ID_DIGB: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGB_SEL; - break; - case ENGINE_ID_DIGC: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGC_SEL; - break; - case ENGINE_ID_DIGD: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGD_SEL; - break; - case ENGINE_ID_DIGE: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGE_SEL; - break; - case ENGINE_ID_DIGF: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGF_SEL; - break; - case ENGINE_ID_DIGG: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGG_SEL; - break; - case ENGINE_ID_UNKNOWN: - /* No DIG_FRONT is associated to DIG_BACKEND */ - atom_dig_encoder_sel = 0; - break; - default: - atom_dig_encoder_sel = ATOM_TRANMSITTER_V6__DIGA_SEL; - break; - } + /* On any ASIC after DCE80, we manually program the DIG_FE + * selection (see connect_dig_be_to_fe function of the link + * encoder), so translation should always return 0 (no FE). + */ return 0; } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c 2019-08-31 15:01:11.856736168 -0500 @@ -25,6 +25,7 @@ #include +#include "resource.h" #include "dm_services.h" #include "dce_calcs.h" #include "dc.h" @@ -2852,7 +2853,7 @@ data->src_height[num_displays * 2 + j] = bw_int_to_fixed(pipe[i].bottom_pipe->plane_res.scl_data.viewport.height); data->src_width[num_displays * 2 + j] = bw_int_to_fixed(pipe[i].bottom_pipe->plane_res.scl_data.viewport.width); data->pitch_in_pixels[num_displays * 2 + j] = bw_int_to_fixed( - pipe[i].bottom_pipe->plane_state->plane_size.grph.surface_pitch); + pipe[i].bottom_pipe->plane_state->plane_size.surface_pitch); data->h_taps[num_displays * 2 + j] = bw_int_to_fixed(pipe[i].bottom_pipe->plane_res.scl_data.taps.h_taps); data->v_taps[num_displays * 2 + j] = bw_int_to_fixed(pipe[i].bottom_pipe->plane_res.scl_data.taps.v_taps); data->h_scale_ratio[num_displays * 2 + j] = fixed31_32_to_bw_fixed( @@ -2977,6 +2978,32 @@ data->number_of_displays = num_displays; } +static bool all_displays_in_sync(const struct pipe_ctx pipe[], + int pipe_count) +{ + const struct pipe_ctx *active_pipes[MAX_PIPES]; + int i, num_active_pipes = 0; + + for (i = 0; i < pipe_count; i++) { + if (!pipe[i].stream || pipe[i].top_pipe) + continue; + + active_pipes[num_active_pipes++] = &pipe[i]; + } + + if (!num_active_pipes) + return false; + + for (i = 1; i < num_active_pipes; ++i) { + if (!resource_are_streams_timing_synchronizable( + active_pipes[0]->stream, active_pipes[i]->stream)) { + return false; + } + } + + return true; +} + /** * Return: * true - Display(s) configuration supported. @@ -2998,8 +3025,10 @@ populate_initial_data(pipe, pipe_count, data); - /*TODO: this should be taken out calcs output and assigned during timing sync for pplib use*/ - calcs_output->all_displays_in_sync = false; + if (ctx->dc->config.multi_mon_pp_mclk_switch) + calcs_output->all_displays_in_sync = all_displays_in_sync(pipe, pipe_count); + else + calcs_output->all_displays_in_sync = false; if (data->number_of_displays != 0) { uint8_t yclk_lvl, sclk_lvl; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c 2019-08-31 15:01:11.857736168 -0500 @@ -329,7 +329,7 @@ dcc_support_pixel_format(pipe->plane_state->format, &bpe) ? 1 : 0; } input->src.dcc_rate = 1; - input->src.meta_pitch = pipe->plane_state->dcc.grph.meta_pitch; + input->src.meta_pitch = pipe->plane_state->dcc.meta_pitch; input->src.source_scan = dm_horz; input->src.sw_mode = pipe->plane_state->tiling_info.gfx9.swizzle; @@ -705,6 +705,13 @@ hack_force_pipe_split(v, context->streams[0]->timing.pix_clk_100hz); } + +unsigned int get_highest_allowed_voltage_level(uint32_t hw_internal_rev) +{ + /* we are ok with all levels */ + return 4; +} + bool dcn_validate_bandwidth( struct dc *dc, struct dc_state *context, @@ -732,6 +739,7 @@ memset(v, 0, sizeof(*v)); kernel_fpu_begin(); + v->sr_exit_time = dc->dcn_soc->sr_exit_time; v->sr_enter_plus_exit_time = dc->dcn_soc->sr_enter_plus_exit_time; v->urgent_latency = dc->dcn_soc->urgent_latency; @@ -1268,7 +1276,7 @@ PERFORMANCE_TRACE_END(); BW_VAL_TRACE_FINISH(); - if (bw_limit_pass && v->voltage_level != 5) + if (bw_limit_pass && v->voltage_level <= get_highest_allowed_voltage_level(dc->ctx->asic_id.hw_internal_rev)) return true; else return false; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/calcs/Makefile linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/calcs/Makefile --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/calcs/Makefile 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/calcs/Makefile 2019-08-31 15:01:11.856736168 -0500 @@ -32,6 +32,10 @@ calcs_ccflags := -mhard-float -msse $(cc_stack_align) +ifdef CONFIG_CC_IS_CLANG +calcs_ccflags += -msse2 +endif + CFLAGS_dcn_calcs.o := $(calcs_ccflags) CFLAGS_dcn_calc_auto.o := $(calcs_ccflags) CFLAGS_dcn_calc_math.o := $(calcs_ccflags) -Wno-tautological-compare diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c 2019-08-31 15:01:11.857736168 -0500 @@ -37,6 +37,9 @@ #include "dcn10/rv1_clk_mgr.h" #include "dcn10/rv2_clk_mgr.h" #include "dcn20/dcn20_clk_mgr.h" +#if defined(CONFIG_DRM_AMD_DC_DCN2_1) +#include "dcn21/rn_clk_mgr.h" +#endif int clk_mgr_helper_get_active_display_cnt( @@ -108,6 +111,12 @@ #if defined(CONFIG_DRM_AMD_DC_DCN1_0) case FAMILY_RV: +#if defined(CONFIG_DRM_AMD_DC_DCN2_1) + if (ASICREV_IS_RENOIR(asic_id.hw_internal_rev)) { + rn_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg); + break; + } +#endif /* DCN2_1 */ if (ASICREV_IS_RAVEN2(asic_id.hw_internal_rev)) { rv2_clk_mgr_construct(ctx, clk_mgr, pp_smu); break; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/dce100/dce_clk_mgr.c 2019-08-31 15:01:11.857736168 -0500 @@ -273,18 +273,12 @@ { struct dc_debug_options *debug = &clk_mgr_dce->base.ctx->dc->debug; struct dc_bios *bp = clk_mgr_dce->base.ctx->dc_bios; - struct integrated_info info = { { { 0 } } }; - struct dc_firmware_info fw_info = { { 0 } }; int i; if (bp->integrated_info) - info = *bp->integrated_info; - - clk_mgr_dce->dentist_vco_freq_khz = info.dentist_vco_freq; + clk_mgr_dce->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq; if (clk_mgr_dce->dentist_vco_freq_khz == 0) { - bp->funcs->get_firmware_info(bp, &fw_info); - clk_mgr_dce->dentist_vco_freq_khz = - fw_info.smu_gpu_pll_output_freq; + clk_mgr_dce->dentist_vco_freq_khz = bp->fw_info.smu_gpu_pll_output_freq; if (clk_mgr_dce->dentist_vco_freq_khz == 0) clk_mgr_dce->dentist_vco_freq_khz = 3600000; } @@ -317,9 +311,10 @@ /*Do not allow bad VBIOS/SBIOS to override with invalid values, * check for > 100MHz*/ - if (info.disp_clk_voltage[i].max_supported_clk >= 100000) - clk_mgr_dce->max_clks_by_state[clk_state].display_clk_khz = - info.disp_clk_voltage[i].max_supported_clk; + if (bp->integrated_info) + if (bp->integrated_info->disp_clk_voltage[i].max_supported_clk >= 100000) + clk_mgr_dce->max_clks_by_state[clk_state].display_clk_khz = + bp->integrated_info->disp_clk_voltage[i].max_supported_clk; } if (!debug->disable_dfs_bypass && bp->integrated_info) diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn10/rv1_clk_mgr.c 2019-08-31 15:01:11.857736168 -0500 @@ -34,6 +34,11 @@ #include "rv1_clk_mgr_vbios_smu.h" #include "rv1_clk_mgr_clk.h" +void rv1_init_clocks(struct clk_mgr *clk_mgr) +{ + memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); +} + static int rv1_determine_dppclk_threshold(struct clk_mgr_internal *clk_mgr, struct dc_clocks *new_clocks) { bool request_dpp_div = new_clocks->dispclk_khz > new_clocks->dppclk_khz; @@ -232,6 +237,7 @@ } static struct clk_mgr_funcs rv1_clk_funcs = { + .init_clocks = rv1_init_clocks, .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = rv1_update_clocks, .enable_pme_wa = rv1_enable_pme_wa, @@ -246,7 +252,6 @@ { struct dc_debug_options *debug = &ctx->dc->debug; struct dc_bios *bp = ctx->dc_bios; - struct dc_firmware_info fw_info = { { 0 } }; clk_mgr->base.ctx = ctx; clk_mgr->pp_smu = pp_smu; @@ -262,9 +267,8 @@ if (bp->integrated_info) clk_mgr->dentist_vco_freq_khz = bp->integrated_info->dentist_vco_freq; - if (clk_mgr->dentist_vco_freq_khz == 0) { - bp->funcs->get_firmware_info(bp, &fw_info); - clk_mgr->dentist_vco_freq_khz = fw_info.smu_gpu_pll_output_freq; + if (bp->fw_info_valid && clk_mgr->dentist_vco_freq_khz == 0) { + clk_mgr->dentist_vco_freq_khz = bp->fw_info.smu_gpu_pll_output_freq; if (clk_mgr->dentist_vco_freq_khz == 0) clk_mgr->dentist_vco_freq_khz = 3600000; } diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c 2019-08-31 15:01:11.857736168 -0500 @@ -26,8 +26,6 @@ #include "dccg.h" #include "clk_mgr_internal.h" - -#include "dcn20/dcn20_clk_mgr.h" #include "dce100/dce_clk_mgr.h" #include "reg_helper.h" #include "core_types.h" @@ -106,7 +104,6 @@ { int i; - clk_mgr->dccg->ref_dppclk = clk_mgr->base.clks.dppclk_khz; for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { int dpp_inst, dppclk_khz; @@ -116,28 +113,75 @@ dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst; dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; clk_mgr->dccg->funcs->update_dpp_dto( - clk_mgr->dccg, dpp_inst, dppclk_khz); + clk_mgr->dccg, dpp_inst, dppclk_khz, false); } } -void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr) +static void update_global_dpp_clk(struct clk_mgr_internal *clk_mgr, unsigned int khz) { int dpp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->dentist_vco_freq_khz / clk_mgr->base.clks.dppclk_khz; - int disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR - * clk_mgr->dentist_vco_freq_khz / clk_mgr->base.clks.dispclk_khz; + * clk_mgr->dentist_vco_freq_khz / khz; uint32_t dppclk_wdivider = dentist_get_did_from_divider(dpp_divider); - uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider); REG_UPDATE(DENTIST_DISPCLK_CNTL, - DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider); -// REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 5, 100); - REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider); REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100); } +static void update_display_clk(struct clk_mgr_internal *clk_mgr, unsigned int khz) +{ + int disp_divider = DENTIST_DIVIDER_RANGE_SCALE_FACTOR + * clk_mgr->dentist_vco_freq_khz / khz; + + uint32_t dispclk_wdivider = dentist_get_did_from_divider(disp_divider); + + REG_UPDATE(DENTIST_DISPCLK_CNTL, + DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider); +} + +static void request_voltage_and_program_disp_clk(struct clk_mgr *clk_mgr_base, unsigned int khz) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dc *dc = clk_mgr_base->ctx->dc; + struct pp_smu_funcs_nv *pp_smu = NULL; + bool going_up = clk_mgr->base.clks.dispclk_khz < khz; + + if (dc->res_pool->pp_smu) + pp_smu = &dc->res_pool->pp_smu->nv_funcs; + + clk_mgr->base.clks.dispclk_khz = khz; + + if (going_up && pp_smu && pp_smu->set_voltage_by_freq) + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000); + + update_display_clk(clk_mgr, khz); + + if (!going_up && pp_smu && pp_smu->set_voltage_by_freq) + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000); +} + +static void request_voltage_and_program_global_dpp_clk(struct clk_mgr *clk_mgr_base, unsigned int khz) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dc *dc = clk_mgr_base->ctx->dc; + struct pp_smu_funcs_nv *pp_smu = NULL; + bool going_up = clk_mgr->base.clks.dppclk_khz < khz; + + if (dc->res_pool->pp_smu) + pp_smu = &dc->res_pool->pp_smu->nv_funcs; + + clk_mgr->base.clks.dppclk_khz = khz; + clk_mgr->dccg->ref_dppclk = khz; + + if (going_up && pp_smu && pp_smu->set_voltage_by_freq) + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000); + + update_global_dpp_clk(clk_mgr, khz); + + if (!going_up && pp_smu && pp_smu->set_voltage_by_freq) + pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000); +} void dcn2_update_clocks(struct clk_mgr *clk_mgr_base, struct dc_state *context, @@ -148,12 +192,21 @@ struct dc *dc = clk_mgr_base->ctx->dc; struct pp_smu_funcs_nv *pp_smu = NULL; int display_count; - bool update_dppclk = false; bool update_dispclk = false; bool enter_display_off = false; - bool dpp_clock_lowered = false; struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; + bool force_reset = false; + int i; + + if (dc->work_arounds.skip_clock_update) + return; + if (clk_mgr_base->clks.dispclk_khz == 0 || + dc->debug.force_clock_mode & 0x1) { + //this is from resume or boot up, if forced_clock cfg option used, we bypass program dispclk and DPPCLK, but need set them for S3. + force_reset = true; + //force_clock_mode 0x1: force reset the clock even it is the same clock as long as it is in Passive level. + } display_count = clk_mgr_helper_get_active_display_cnt(dc, context); if (dc->res_pool->pp_smu) pp_smu = &dc->res_pool->pp_smu->nv_funcs; @@ -172,6 +225,7 @@ pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PHYCLK, clk_mgr_base->clks.phyclk_khz / 1000); } + if (dc->debug.force_min_dcfclk_mhz > 0) new_clocks->dcfclk_khz = (new_clocks->dcfclk_khz > (dc->debug.force_min_dcfclk_mhz * 1000)) ? new_clocks->dcfclk_khz : (dc->debug.force_min_dcfclk_mhz * 1000); @@ -196,10 +250,13 @@ } if (should_update_pstate_support(safe_to_lower, new_clocks->p_state_change_support, clk_mgr_base->clks.p_state_change_support)) { + clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; + clk_mgr_base->clks.p_state_change_support = new_clocks->p_state_change_support; if (pp_smu && pp_smu->set_pstate_handshake_support) pp_smu->set_pstate_handshake_support(&pp_smu->pp_smu, clk_mgr_base->clks.p_state_change_support); } + clk_mgr_base->clks.prev_p_state_change_support = clk_mgr_base->clks.p_state_change_support; if (should_set_clock(safe_to_lower, new_clocks->dramclk_khz, clk_mgr_base->clks.dramclk_khz)) { clk_mgr_base->clks.dramclk_khz = new_clocks->dramclk_khz; @@ -207,35 +264,48 @@ pp_smu->set_hard_min_uclk_by_freq(&pp_smu->pp_smu, clk_mgr_base->clks.dramclk_khz / 1000); } - if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { - if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) - dpp_clock_lowered = true; - clk_mgr->base.clks.dppclk_khz = new_clocks->dppclk_khz; + if (dc->config.forced_clocks == false) { + // First update display clock + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) + request_voltage_and_program_disp_clk(clk_mgr_base, new_clocks->dispclk_khz); + + // Updating DPP clock requires some more logic + if (!safe_to_lower) { + // For pre-programming, we need to make sure any DPP clock that will go up has to go up + + // First raise the global reference if needed + if (new_clocks->dppclk_khz > clk_mgr_base->clks.dppclk_khz) + request_voltage_and_program_global_dpp_clk(clk_mgr_base, new_clocks->dppclk_khz); + + // Then raise any dividers that need raising + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { + int dpp_inst, dppclk_khz; - if (pp_smu && pp_smu->set_voltage_by_freq) - pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_PIXELCLK, clk_mgr_base->clks.dppclk_khz / 1000); + if (!context->res_ctx.pipe_ctx[i].plane_state) + continue; - update_dppclk = true; - } + dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst; + dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; - if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { - clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; - if (pp_smu && pp_smu->set_voltage_by_freq) - pp_smu->set_voltage_by_freq(&pp_smu->pp_smu, PP_SMU_NV_DISPCLK, clk_mgr_base->clks.dispclk_khz / 1000); - - update_dispclk = true; - } - if (dc->config.forced_clocks == false) { - if (dpp_clock_lowered) { - // if clock is being lowered, increase DTO before lowering refclk - dcn20_update_clocks_update_dpp_dto(clk_mgr, context); - dcn20_update_clocks_update_dentist(clk_mgr); + clk_mgr->dccg->funcs->update_dpp_dto(clk_mgr->dccg, dpp_inst, dppclk_khz, true); + } } else { - // if clock is being raised, increase refclk before lowering DTO - if (update_dppclk || update_dispclk) - dcn20_update_clocks_update_dentist(clk_mgr); - if (update_dppclk) - dcn20_update_clocks_update_dpp_dto(clk_mgr, context); + // For post-programming, we can lower ref clk if needed, and unconditionally set all the DTOs + + if (new_clocks->dppclk_khz < clk_mgr_base->clks.dppclk_khz) + request_voltage_and_program_global_dpp_clk(clk_mgr_base, new_clocks->dppclk_khz); + + for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) { + int dpp_inst, dppclk_khz; + + if (!context->res_ctx.pipe_ctx[i].plane_state) + continue; + + dpp_inst = context->res_ctx.pipe_ctx[i].plane_res.dpp->inst; + dppclk_khz = context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz; + + clk_mgr->dccg->funcs->update_dpp_dto(clk_mgr->dccg, dpp_inst, dppclk_khz, false); + } } } if (update_dispclk && @@ -303,6 +373,7 @@ memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks)); // Assumption is that boot state always supports pstate clk_mgr->clks.p_state_change_support = true; + clk_mgr->clks.prev_p_state_change_support = true; } void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base) @@ -318,11 +389,32 @@ } } +void dcn2_get_clock(struct clk_mgr *clk_mgr, + struct dc_state *context, + enum dc_clock_type clock_type, + struct dc_clock_config *clock_cfg) +{ + + if (clock_type == DC_CLOCK_TYPE_DISPCLK) { + clock_cfg->max_clock_khz = context->bw_ctx.bw.dcn.clk.max_supported_dispclk_khz; + clock_cfg->min_clock_khz = DCN_MINIMUM_DISPCLK_Khz; + clock_cfg->current_clock_khz = clk_mgr->clks.dispclk_khz; + clock_cfg->bw_requirequired_clock_khz = context->bw_ctx.bw.dcn.clk.bw_dispclk_khz; + } + if (clock_type == DC_CLOCK_TYPE_DPPCLK) { + clock_cfg->max_clock_khz = context->bw_ctx.bw.dcn.clk.max_supported_dppclk_khz; + clock_cfg->min_clock_khz = DCN_MINIMUM_DPPCLK_Khz; + clock_cfg->current_clock_khz = clk_mgr->clks.dppclk_khz; + clock_cfg->bw_requirequired_clock_khz = context->bw_ctx.bw.dcn.clk.bw_dppclk_khz; + } +} + static struct clk_mgr_funcs dcn2_funcs = { .get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz, .update_clocks = dcn2_update_clocks, .init_clocks = dcn2_init_clocks, - .enable_pme_wa = dcn2_enable_pme_wa + .enable_pme_wa = dcn2_enable_pme_wa, + .get_clock = dcn2_get_clock, }; diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h 2019-08-25 14:01:23.000000000 -0500 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.h 2019-08-31 15:01:11.857736168 -0500 @@ -45,4 +45,9 @@ uint32_t dentist_get_did_from_divider(int divider); +void dcn2_get_clock(struct clk_mgr *clk_mgr, + struct dc_state *context, + enum dc_clock_type clock_type, + struct dc_clock_config *clock_cfg); + #endif //__DCN20_CLK_MGR_H__ diff -Naur linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c --- linux-5.3-rc6/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c 1969-12-31 18:00:00.000000000 -0600 +++ linux-5.3-rc6-agd5fed/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c 2019-08-31 15:01:11.857736168 -0500 @@ -0,0 +1,590 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include "dccg.h" +#include "clk_mgr_internal.h" + + +#include "dcn20/dcn20_clk_mgr.h" +#include "rn_clk_mgr.h" + + +#include "dce100/dce_clk_mgr.h" +#include "rn_clk_mgr_vbios_smu.h" +#include "reg_helper.h" +#include "core_types.h" +#include "dm_helpers.h" + +#include "atomfirmware.h" +#include "clk/clk_10_0_2_offset.h" +#include "clk/clk_10_0_2_sh_mask.h" +#include "renoir_ip_offset.h" + + +/* Constants */ + +#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */ + +/* Macros */ + +#define REG(reg_name) \ + (CLK_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) + +void rn_update_clocks(struct clk_mgr *clk_mgr_base, + struct dc_state *context, + bool safe_to_lower) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk; + struct dc *dc = clk_mgr_base->ctx->dc; + int display_count; + bool update_dppclk = false; + bool update_dispclk = false; + bool enter_display_off = false; + bool dpp_clock_lowered = false; + struct dmcu *dmcu = clk_mgr_base->ctx->dc->res_pool->dmcu; + + display_count = clk_mgr_helper_get_active_display_cnt(dc, context); + + if (display_count == 0) + enter_display_off = true; + + if (enter_display_off == safe_to_lower) { + rn_vbios_smu_set_display_count(clk_mgr, display_count); + } + + if (should_set_clock(safe_to_lower, new_clocks->phyclk_khz, clk_mgr_base->clks.phyclk_khz)) { + clk_mgr_base->clks.phyclk_khz = new_clocks->phyclk_khz; + rn_vbios_smu_set_phyclk(clk_mgr, clk_mgr_base->clks.phyclk_khz); + } + + if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) { + clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz; + rn_vbios_smu_set_hard_min_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_khz); + } + + if (should_set_clock(safe_to_lower, + new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) { + clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz; + rn_vbios_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz); + } + + if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) { + if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz) + dpp_clock_lowered = true; + clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz; + update_dppclk = true; + } + + if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) { + clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz; + rn_vbios_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz); + + update_dispclk = true; + } + + if (dpp_clock_lowered) { + // if clock is being lowered, increase DTO before lowering refclk + dcn20_update_clocks_update_dpp_dto(clk_mgr, context); + rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); + } else { + // if clock is being raised, increase refclk before lowering DTO + if (update_dppclk || update_dispclk) + rn_vbios_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz); + if (update_dppclk) + dcn20_update_clocks_update_dpp_dto(clk_mgr, context); + } + + if (update_dispclk && + dmcu && dmcu->funcs->is_dmcu_initialized(dmcu)) { + /*update dmcu for wait_loop count*/ + dmcu->funcs->set_psr_wait_loop(dmcu, + clk_mgr_base->clks.dispclk_khz / 1000 / 7); + } +} + + +static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) +{ + /* get FbMult value */ + struct fixed31_32 pll_req; + unsigned int fbmult_frac_val = 0; + unsigned int fbmult_int_val = 0; + + + /* + * Register value of fbmult is in 8.16 format, we are converting to 31.32 + * to leverage the fix point operations available in driver + */ + + REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/ + REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */ + + pll_req = dc_fixpt_from_int(fbmult_int_val); + + /* + * since fractional part is only 16 bit in register definition but is 32 bit + * in our fix point definiton, need to shift left by 16 to obtain correct value + */ + pll_req.value |= fbmult_frac_val << 16; + + /* multiply by REFCLK period */ + pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz); + + /* integer part is now VCO frequency in kHz */ + return dc_fixpt_floor(pll_req); +} + +static void rn_dump_clk_registers_internal(struct rn_clk_internal *internal, struct clk_mgr *clk_mgr_base) +{ + struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base); + + internal->CLK1_CLK3_CURRENT_CNT = REG_READ(CLK1_CLK3_CURRENT_CNT); + internal->CLK1_CLK3_BYPASS_CNTL = REG_READ(CLK1_CLK3_BYPASS_CNTL); + + internal->CLK1_CLK3_DS_CNTL = REG_READ(CLK1_CLK3_DS_CNTL); //dcf deep sleep divider + internal->CLK1_CLK3_ALLOW_DS = REG_READ(CLK1_CLK3_ALLOW_DS); + + internal->CLK1_CLK1_CURRENT_CNT = REG_READ(CLK1_CLK1_CURRENT_CNT); + internal->CLK1_CLK1_BYPASS_CNTL = REG_READ(CLK1_CLK1_BYPASS_CNTL); + + internal->CLK1_CLK2_CURRENT_CNT = REG_READ(CLK1_CLK2_CURRENT_CNT); + internal->CLK1_CLK2_BYPASS_CNTL = REG_READ(CLK1_CLK2_BYPASS_CNTL); + + internal->CLK1_CLK0_CURRENT_CNT = REG_READ(CLK1_CLK0_CURRENT_CNT); + internal->CLK1_CLK0_BYPASS_CNTL = REG_READ(CLK1_CLK0_BYPASS_CNTL); +} + +/* This function collect raw clk register values */ +static void rn_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass, + struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info) +{ + struct rn_clk_internal internal = {0}; + char *bypass_clks[5] = {"0x0 DFS", "0x1 REFCLK", "0x2 ERROR", "0x3